1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56 #include "llvm/CodeGen/TargetRegisterInfo.h"
57 #include "llvm/CodeGen/ValueTypes.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSectionXCOFF.h"
78 #include "llvm/MC/MCSymbolXCOFF.h"
79 #include "llvm/Support/AtomicOrdering.h"
80 #include "llvm/Support/BranchProbability.h"
81 #include "llvm/Support/Casting.h"
82 #include "llvm/Support/CodeGen.h"
83 #include "llvm/Support/CommandLine.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
86 #include "llvm/Support/ErrorHandling.h"
87 #include "llvm/Support/Format.h"
88 #include "llvm/Support/KnownBits.h"
89 #include "llvm/Support/MachineValueType.h"
90 #include "llvm/Support/MathExtras.h"
91 #include "llvm/Support/raw_ostream.h"
92 #include "llvm/Target/TargetMachine.h"
93 #include "llvm/Target/TargetOptions.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
124 STATISTIC(NumTailCalls, "Number of tail calls");
125 STATISTIC(NumSiblingCalls, "Number of sibling calls");
126 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
127 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
128 
129 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
130 
131 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
132 
133 // FIXME: Remove this once the bug has been fixed!
134 extern cl::opt<bool> ANDIGlueBug;
135 
136 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
137                                      const PPCSubtarget &STI)
138     : TargetLowering(TM), Subtarget(STI) {
139   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140   // arguments are at least 4/8 bytes aligned.
141   bool isPPC64 = Subtarget.isPPC64();
142   setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
143 
144   // Set up the register classes.
145   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
146   if (!useSoftFloat()) {
147     if (hasSPE()) {
148       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
149       addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
150     } else {
151       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
152       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
153     }
154   }
155 
156   // Match BITREVERSE to customized fast code sequence in the td file.
157   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
158   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
159 
160   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
162 
163   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164   for (MVT VT : MVT::integer_valuetypes()) {
165     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
166     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
167   }
168 
169   if (Subtarget.isISA3_0()) {
170     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
171     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
172     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
173     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
174   } else {
175     // No extending loads from f16 or HW conversions back and forth.
176     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
177     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
178     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
179     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
180     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
181     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
182     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
183     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
184   }
185 
186   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
187 
188   // PowerPC has pre-inc load and store's.
189   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
190   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
191   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
192   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
193   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
194   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
195   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
196   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
197   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
198   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
199   if (!Subtarget.hasSPE()) {
200     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
201     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
202     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
203     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
204   }
205 
206   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
207   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
208   for (MVT VT : ScalarIntVTs) {
209     setOperationAction(ISD::ADDC, VT, Legal);
210     setOperationAction(ISD::ADDE, VT, Legal);
211     setOperationAction(ISD::SUBC, VT, Legal);
212     setOperationAction(ISD::SUBE, VT, Legal);
213   }
214 
215   if (Subtarget.useCRBits()) {
216     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
217 
218     if (isPPC64 || Subtarget.hasFPCVT()) {
219       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
220       AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
221                         isPPC64 ? MVT::i64 : MVT::i32);
222       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
223       AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
224                         isPPC64 ? MVT::i64 : MVT::i32);
225 
226       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
227       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
228                          isPPC64 ? MVT::i64 : MVT::i32);
229       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
230       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
231                         isPPC64 ? MVT::i64 : MVT::i32);
232     } else {
233       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
234       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
235       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
236       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
237     }
238 
239     // PowerPC does not support direct load/store of condition registers.
240     setOperationAction(ISD::LOAD, MVT::i1, Custom);
241     setOperationAction(ISD::STORE, MVT::i1, Custom);
242 
243     // FIXME: Remove this once the ANDI glue bug is fixed:
244     if (ANDIGlueBug)
245       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
246 
247     for (MVT VT : MVT::integer_valuetypes()) {
248       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
249       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
250       setTruncStoreAction(VT, MVT::i1, Expand);
251     }
252 
253     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
254   }
255 
256   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
257   // PPC (the libcall is not available).
258   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
259   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
260   setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
261   setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
262 
263   // We do not currently implement these libm ops for PowerPC.
264   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
265   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
266   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
267   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
268   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
269   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
270 
271   // PowerPC has no SREM/UREM instructions unless we are on P9
272   // On P9 we may use a hardware instruction to compute the remainder.
273   // When the result of both the remainder and the division is required it is
274   // more efficient to compute the remainder from the result of the division
275   // rather than use the remainder instruction. The instructions are legalized
276   // directly because the DivRemPairsPass performs the transformation at the IR
277   // level.
278   if (Subtarget.isISA3_0()) {
279     setOperationAction(ISD::SREM, MVT::i32, Legal);
280     setOperationAction(ISD::UREM, MVT::i32, Legal);
281     setOperationAction(ISD::SREM, MVT::i64, Legal);
282     setOperationAction(ISD::UREM, MVT::i64, Legal);
283   } else {
284     setOperationAction(ISD::SREM, MVT::i32, Expand);
285     setOperationAction(ISD::UREM, MVT::i32, Expand);
286     setOperationAction(ISD::SREM, MVT::i64, Expand);
287     setOperationAction(ISD::UREM, MVT::i64, Expand);
288   }
289 
290   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
291   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
292   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
293   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
294   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
295   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
296   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
297   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
298   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
299 
300   // Handle constrained floating-point operations of scalar.
301   // TODO: Handle SPE specific operation.
302   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
303   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
304   setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
305   setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
306   setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
307   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
308 
309   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
310   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
311   setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
312   setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
313   setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
314   if (Subtarget.hasVSX()) {
315     setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
316     setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
317   }
318 
319   if (Subtarget.hasFSQRT()) {
320     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
321     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
322   }
323 
324   if (Subtarget.hasFPRND()) {
325     setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
326     setOperationAction(ISD::STRICT_FCEIL,  MVT::f32, Legal);
327     setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
328     setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
329 
330     setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
331     setOperationAction(ISD::STRICT_FCEIL,  MVT::f64, Legal);
332     setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
333     setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
334   }
335 
336   // We don't support sin/cos/sqrt/fmod/pow
337   setOperationAction(ISD::FSIN , MVT::f64, Expand);
338   setOperationAction(ISD::FCOS , MVT::f64, Expand);
339   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
340   setOperationAction(ISD::FREM , MVT::f64, Expand);
341   setOperationAction(ISD::FPOW , MVT::f64, Expand);
342   setOperationAction(ISD::FSIN , MVT::f32, Expand);
343   setOperationAction(ISD::FCOS , MVT::f32, Expand);
344   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
345   setOperationAction(ISD::FREM , MVT::f32, Expand);
346   setOperationAction(ISD::FPOW , MVT::f32, Expand);
347   if (Subtarget.hasSPE()) {
348     setOperationAction(ISD::FMA  , MVT::f64, Expand);
349     setOperationAction(ISD::FMA  , MVT::f32, Expand);
350   } else {
351     setOperationAction(ISD::FMA  , MVT::f64, Legal);
352     setOperationAction(ISD::FMA  , MVT::f32, Legal);
353   }
354 
355   if (Subtarget.hasSPE())
356     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
357 
358   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
359 
360   // If we're enabling GP optimizations, use hardware square root
361   if (!Subtarget.hasFSQRT() &&
362       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
363         Subtarget.hasFRE()))
364     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
365 
366   if (!Subtarget.hasFSQRT() &&
367       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
368         Subtarget.hasFRES()))
369     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
370 
371   if (Subtarget.hasFCPSGN()) {
372     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
373     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
374   } else {
375     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
376     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
377   }
378 
379   if (Subtarget.hasFPRND()) {
380     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
381     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
382     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
383     setOperationAction(ISD::FROUND, MVT::f64, Legal);
384 
385     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
386     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
387     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
388     setOperationAction(ISD::FROUND, MVT::f32, Legal);
389   }
390 
391   // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
392   // to speed up scalar BSWAP64.
393   // CTPOP or CTTZ were introduced in P8/P9 respectively
394   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
395   if (Subtarget.hasP9Vector())
396     setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
397   else
398     setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
399   if (Subtarget.isISA3_0()) {
400     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
401     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
402   } else {
403     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
404     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
405   }
406 
407   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
408     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
409     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
410   } else {
411     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
412     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
413   }
414 
415   // PowerPC does not have ROTR
416   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
417   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
418 
419   if (!Subtarget.useCRBits()) {
420     // PowerPC does not have Select
421     setOperationAction(ISD::SELECT, MVT::i32, Expand);
422     setOperationAction(ISD::SELECT, MVT::i64, Expand);
423     setOperationAction(ISD::SELECT, MVT::f32, Expand);
424     setOperationAction(ISD::SELECT, MVT::f64, Expand);
425   }
426 
427   // PowerPC wants to turn select_cc of FP into fsel when possible.
428   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
429   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
430 
431   // PowerPC wants to optimize integer setcc a bit
432   if (!Subtarget.useCRBits())
433     setOperationAction(ISD::SETCC, MVT::i32, Custom);
434 
435   if (Subtarget.hasFPU()) {
436     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
437     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
438     setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
439 
440     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
441     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
442     setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
443   }
444 
445   // PowerPC does not have BRCOND which requires SetCC
446   if (!Subtarget.useCRBits())
447     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
448 
449   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
450 
451   if (Subtarget.hasSPE()) {
452     // SPE has built-in conversions
453     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
454     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
455     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
456     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
457     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
458     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
459   } else {
460     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
461     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
462     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
463 
464     // PowerPC does not have [U|S]INT_TO_FP
465     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
466     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
467     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
468     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
469   }
470 
471   if (Subtarget.hasDirectMove() && isPPC64) {
472     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
473     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
474     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
475     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
476     if (TM.Options.UnsafeFPMath) {
477       setOperationAction(ISD::LRINT, MVT::f64, Legal);
478       setOperationAction(ISD::LRINT, MVT::f32, Legal);
479       setOperationAction(ISD::LLRINT, MVT::f64, Legal);
480       setOperationAction(ISD::LLRINT, MVT::f32, Legal);
481       setOperationAction(ISD::LROUND, MVT::f64, Legal);
482       setOperationAction(ISD::LROUND, MVT::f32, Legal);
483       setOperationAction(ISD::LLROUND, MVT::f64, Legal);
484       setOperationAction(ISD::LLROUND, MVT::f32, Legal);
485     }
486   } else {
487     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
488     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
489     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
490     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
491   }
492 
493   // We cannot sextinreg(i1).  Expand to shifts.
494   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
495 
496   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
497   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
498   // support continuation, user-level threading, and etc.. As a result, no
499   // other SjLj exception interfaces are implemented and please don't build
500   // your own exception handling based on them.
501   // LLVM/Clang supports zero-cost DWARF exception handling.
502   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
503   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
504 
505   // We want to legalize GlobalAddress and ConstantPool nodes into the
506   // appropriate instructions to materialize the address.
507   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
508   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
509   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
510   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
511   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
512   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
513   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
514   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
515   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
516   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
517 
518   // TRAP is legal.
519   setOperationAction(ISD::TRAP, MVT::Other, Legal);
520 
521   // TRAMPOLINE is custom lowered.
522   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
523   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
524 
525   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
526   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
527 
528   if (Subtarget.is64BitELFABI()) {
529     // VAARG always uses double-word chunks, so promote anything smaller.
530     setOperationAction(ISD::VAARG, MVT::i1, Promote);
531     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
532     setOperationAction(ISD::VAARG, MVT::i8, Promote);
533     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
534     setOperationAction(ISD::VAARG, MVT::i16, Promote);
535     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
536     setOperationAction(ISD::VAARG, MVT::i32, Promote);
537     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
538     setOperationAction(ISD::VAARG, MVT::Other, Expand);
539   } else if (Subtarget.is32BitELFABI()) {
540     // VAARG is custom lowered with the 32-bit SVR4 ABI.
541     setOperationAction(ISD::VAARG, MVT::Other, Custom);
542     setOperationAction(ISD::VAARG, MVT::i64, Custom);
543   } else
544     setOperationAction(ISD::VAARG, MVT::Other, Expand);
545 
546   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
547   if (Subtarget.is32BitELFABI())
548     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
549   else
550     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
551 
552   // Use the default implementation.
553   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
554   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
555   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
556   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
557   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
558   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
559   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
560   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
561   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
562 
563   // We want to custom lower some of our intrinsics.
564   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
565 
566   // To handle counter-based loop conditions.
567   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
568 
569   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
570   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
571   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
572   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
573 
574   // Comparisons that require checking two conditions.
575   if (Subtarget.hasSPE()) {
576     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
577     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
578     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
579     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
580   }
581   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
582   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
583   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
584   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
585   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
586   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
587   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
588   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
589   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
590   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
591   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
592   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
593 
594   if (Subtarget.has64BitSupport()) {
595     // They also have instructions for converting between i64 and fp.
596     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
597     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
598     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
599     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
600     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
601     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
602     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
603     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
604     // This is just the low 32 bits of a (signed) fp->i64 conversion.
605     // We cannot do this with Promote because i64 is not a legal type.
606     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
607     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
608 
609     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
610       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
611       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
612     }
613   } else {
614     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
615     if (Subtarget.hasSPE()) {
616       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
617       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
618     } else {
619       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
620       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
621     }
622   }
623 
624   // With the instructions enabled under FPCVT, we can do everything.
625   if (Subtarget.hasFPCVT()) {
626     if (Subtarget.has64BitSupport()) {
627       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
628       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
629       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
630       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
631       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
632       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
633       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
634       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
635     }
636 
637     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
638     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
639     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
640     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
641     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
642     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
643     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
644     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
645   }
646 
647   if (Subtarget.use64BitRegs()) {
648     // 64-bit PowerPC implementations can support i64 types directly
649     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
650     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
651     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
652     // 64-bit PowerPC wants to expand i128 shifts itself.
653     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
654     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
655     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
656   } else {
657     // 32-bit PowerPC wants to expand i64 shifts itself.
658     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
659     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
660     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
661   }
662 
663   // PowerPC has better expansions for funnel shifts than the generic
664   // TargetLowering::expandFunnelShift.
665   if (Subtarget.has64BitSupport()) {
666     setOperationAction(ISD::FSHL, MVT::i64, Custom);
667     setOperationAction(ISD::FSHR, MVT::i64, Custom);
668   }
669   setOperationAction(ISD::FSHL, MVT::i32, Custom);
670   setOperationAction(ISD::FSHR, MVT::i32, Custom);
671 
672   if (Subtarget.hasVSX()) {
673     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
674     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
675     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
676     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
677   }
678 
679   if (Subtarget.hasAltivec()) {
680     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
681       setOperationAction(ISD::SADDSAT, VT, Legal);
682       setOperationAction(ISD::SSUBSAT, VT, Legal);
683       setOperationAction(ISD::UADDSAT, VT, Legal);
684       setOperationAction(ISD::USUBSAT, VT, Legal);
685     }
686     // First set operation action for all vector types to expand. Then we
687     // will selectively turn on ones that can be effectively codegen'd.
688     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
689       // add/sub are legal for all supported vector VT's.
690       setOperationAction(ISD::ADD, VT, Legal);
691       setOperationAction(ISD::SUB, VT, Legal);
692 
693       // For v2i64, these are only valid with P8Vector. This is corrected after
694       // the loop.
695       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
696         setOperationAction(ISD::SMAX, VT, Legal);
697         setOperationAction(ISD::SMIN, VT, Legal);
698         setOperationAction(ISD::UMAX, VT, Legal);
699         setOperationAction(ISD::UMIN, VT, Legal);
700       }
701       else {
702         setOperationAction(ISD::SMAX, VT, Expand);
703         setOperationAction(ISD::SMIN, VT, Expand);
704         setOperationAction(ISD::UMAX, VT, Expand);
705         setOperationAction(ISD::UMIN, VT, Expand);
706       }
707 
708       if (Subtarget.hasVSX()) {
709         setOperationAction(ISD::FMAXNUM, VT, Legal);
710         setOperationAction(ISD::FMINNUM, VT, Legal);
711       }
712 
713       // Vector instructions introduced in P8
714       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
715         setOperationAction(ISD::CTPOP, VT, Legal);
716         setOperationAction(ISD::CTLZ, VT, Legal);
717       }
718       else {
719         setOperationAction(ISD::CTPOP, VT, Expand);
720         setOperationAction(ISD::CTLZ, VT, Expand);
721       }
722 
723       // Vector instructions introduced in P9
724       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
725         setOperationAction(ISD::CTTZ, VT, Legal);
726       else
727         setOperationAction(ISD::CTTZ, VT, Expand);
728 
729       // We promote all shuffles to v16i8.
730       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
731       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
732 
733       // We promote all non-typed operations to v4i32.
734       setOperationAction(ISD::AND   , VT, Promote);
735       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
736       setOperationAction(ISD::OR    , VT, Promote);
737       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
738       setOperationAction(ISD::XOR   , VT, Promote);
739       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
740       setOperationAction(ISD::LOAD  , VT, Promote);
741       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
742       setOperationAction(ISD::SELECT, VT, Promote);
743       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
744       setOperationAction(ISD::VSELECT, VT, Legal);
745       setOperationAction(ISD::SELECT_CC, VT, Promote);
746       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
747       setOperationAction(ISD::STORE, VT, Promote);
748       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
749 
750       // No other operations are legal.
751       setOperationAction(ISD::MUL , VT, Expand);
752       setOperationAction(ISD::SDIV, VT, Expand);
753       setOperationAction(ISD::SREM, VT, Expand);
754       setOperationAction(ISD::UDIV, VT, Expand);
755       setOperationAction(ISD::UREM, VT, Expand);
756       setOperationAction(ISD::FDIV, VT, Expand);
757       setOperationAction(ISD::FREM, VT, Expand);
758       setOperationAction(ISD::FNEG, VT, Expand);
759       setOperationAction(ISD::FSQRT, VT, Expand);
760       setOperationAction(ISD::FLOG, VT, Expand);
761       setOperationAction(ISD::FLOG10, VT, Expand);
762       setOperationAction(ISD::FLOG2, VT, Expand);
763       setOperationAction(ISD::FEXP, VT, Expand);
764       setOperationAction(ISD::FEXP2, VT, Expand);
765       setOperationAction(ISD::FSIN, VT, Expand);
766       setOperationAction(ISD::FCOS, VT, Expand);
767       setOperationAction(ISD::FABS, VT, Expand);
768       setOperationAction(ISD::FFLOOR, VT, Expand);
769       setOperationAction(ISD::FCEIL,  VT, Expand);
770       setOperationAction(ISD::FTRUNC, VT, Expand);
771       setOperationAction(ISD::FRINT,  VT, Expand);
772       setOperationAction(ISD::FNEARBYINT, VT, Expand);
773       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
774       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
775       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
776       setOperationAction(ISD::MULHU, VT, Expand);
777       setOperationAction(ISD::MULHS, VT, Expand);
778       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
779       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
780       setOperationAction(ISD::UDIVREM, VT, Expand);
781       setOperationAction(ISD::SDIVREM, VT, Expand);
782       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
783       setOperationAction(ISD::FPOW, VT, Expand);
784       setOperationAction(ISD::BSWAP, VT, Expand);
785       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
786       setOperationAction(ISD::ROTL, VT, Expand);
787       setOperationAction(ISD::ROTR, VT, Expand);
788 
789       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
790         setTruncStoreAction(VT, InnerVT, Expand);
791         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
792         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
793         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
794       }
795     }
796     setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
797     if (!Subtarget.hasP8Vector()) {
798       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
799       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
800       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
801       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
802     }
803 
804     for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
805       setOperationAction(ISD::ABS, VT, Custom);
806 
807     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
808     // with merges, splats, etc.
809     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
810 
811     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
812     // are cheap, so handle them before they get expanded to scalar.
813     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
814     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
815     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
816     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
817     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
818 
819     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
820     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
821     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
822     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
823     setOperationAction(ISD::SELECT, MVT::v4i32,
824                        Subtarget.useCRBits() ? Legal : Expand);
825     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
826     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
827     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
828     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
829     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
830     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
831     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
832     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
833     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
834     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
835     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
836     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
837     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
838 
839     // Without hasP8Altivec set, v2i64 SMAX isn't available.
840     // But ABS custom lowering requires SMAX support.
841     if (!Subtarget.hasP8Altivec())
842       setOperationAction(ISD::ABS, MVT::v2i64, Expand);
843 
844     // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
845     setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
846     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
847     if (Subtarget.hasAltivec())
848       for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
849         setOperationAction(ISD::ROTL, VT, Legal);
850     // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
851     if (Subtarget.hasP8Altivec())
852       setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
853 
854     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
855     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
856     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
857     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
858 
859     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
860     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
861 
862     if (Subtarget.hasVSX()) {
863       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
864       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
865     }
866 
867     if (Subtarget.hasP8Altivec())
868       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
869     else
870       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
871 
872     if (Subtarget.isISA3_1()) {
873       setOperationAction(ISD::MUL, MVT::v2i64, Legal);
874       setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
875       setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
876       setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
877       setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
878       setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
879       setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
880       setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
881       setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
882       setOperationAction(ISD::UREM, MVT::v2i64, Legal);
883       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
884       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
885       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
886       setOperationAction(ISD::UREM, MVT::v1i128, Legal);
887       setOperationAction(ISD::SREM, MVT::v1i128, Legal);
888       setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
889       setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
890       setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
891     }
892 
893     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
894     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
895 
896     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
897     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
898 
899     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
900     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
901     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
902     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
903 
904     // Altivec does not contain unordered floating-point compare instructions
905     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
906     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
907     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
908     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
909 
910     if (Subtarget.hasVSX()) {
911       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
912       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
913       if (Subtarget.hasP8Vector()) {
914         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
915         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
916       }
917       if (Subtarget.hasDirectMove() && isPPC64) {
918         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
919         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
920         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
921         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
922         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
923         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
924         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
925         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
926       }
927       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
928 
929       // The nearbyint variants are not allowed to raise the inexact exception
930       // so we can only code-gen them with unsafe math.
931       if (TM.Options.UnsafeFPMath) {
932         setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
933         setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
934       }
935 
936       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
937       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
938       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
939       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
940       setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
941       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
942       setOperationAction(ISD::FROUND, MVT::f64, Legal);
943       setOperationAction(ISD::FRINT, MVT::f64, Legal);
944 
945       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
946       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
947       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
948       setOperationAction(ISD::FROUND, MVT::f32, Legal);
949       setOperationAction(ISD::FRINT, MVT::f32, Legal);
950 
951       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
952       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
953 
954       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
955       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
956 
957       // Share the Altivec comparison restrictions.
958       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
959       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
960       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
961       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
962 
963       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
964       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
965 
966       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
967 
968       if (Subtarget.hasP8Vector())
969         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
970 
971       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
972 
973       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
974       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
975       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
976 
977       if (Subtarget.hasP8Altivec()) {
978         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
979         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
980         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
981 
982         // 128 bit shifts can be accomplished via 3 instructions for SHL and
983         // SRL, but not for SRA because of the instructions available:
984         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
985         // doing
986         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
987         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
988         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
989 
990         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
991       }
992       else {
993         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
994         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
995         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
996 
997         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
998 
999         // VSX v2i64 only supports non-arithmetic operations.
1000         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1001         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1002       }
1003 
1004       if (Subtarget.isISA3_1())
1005         setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1006       else
1007         setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1008 
1009       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1010       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1011       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1012       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1013 
1014       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1015 
1016       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1017       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1018       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1019       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1020       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1021       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1022       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1023       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1024 
1025       // Custom handling for partial vectors of integers converted to
1026       // floating point. We already have optimal handling for v2i32 through
1027       // the DAG combine, so those aren't necessary.
1028       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1029       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1030       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1031       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1032       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1033       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1034       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1035       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1036       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1037       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1038       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1039       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1040       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1041       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1042       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1043       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1044 
1045       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1046       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1047       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1048       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1049       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1050       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1051 
1052       if (Subtarget.hasDirectMove())
1053         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1054       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1055 
1056       // Handle constrained floating-point operations of vector.
1057       // The predictor is `hasVSX` because altivec instruction has
1058       // no exception but VSX vector instruction has.
1059       setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1060       setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1061       setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1062       setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1063       setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1064       setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1065       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1066       setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1067       setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1068       setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1069       setOperationAction(ISD::STRICT_FCEIL,  MVT::v4f32, Legal);
1070       setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1071       setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1072 
1073       setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1074       setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1075       setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1076       setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1077       setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1078       setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1079       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1080       setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1081       setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1082       setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1083       setOperationAction(ISD::STRICT_FCEIL,  MVT::v2f64, Legal);
1084       setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1085       setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1086 
1087       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1088     }
1089 
1090     if (Subtarget.hasP8Altivec()) {
1091       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1092       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1093     }
1094 
1095     if (Subtarget.hasP9Vector()) {
1096       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1097       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1098 
1099       // 128 bit shifts can be accomplished via 3 instructions for SHL and
1100       // SRL, but not for SRA because of the instructions available:
1101       // VS{RL} and VS{RL}O.
1102       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1103       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1104       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1105 
1106       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1107       setOperationAction(ISD::FADD, MVT::f128, Legal);
1108       setOperationAction(ISD::FSUB, MVT::f128, Legal);
1109       setOperationAction(ISD::FDIV, MVT::f128, Legal);
1110       setOperationAction(ISD::FMUL, MVT::f128, Legal);
1111       setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1112       // No extending loads to f128 on PPC.
1113       for (MVT FPT : MVT::fp_valuetypes())
1114         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1115       setOperationAction(ISD::FMA, MVT::f128, Legal);
1116       setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1117       setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1118       setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1119       setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1120       setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1121       setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1122 
1123       setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1124       setOperationAction(ISD::FRINT, MVT::f128, Legal);
1125       setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1126       setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1127       setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1128       setOperationAction(ISD::FROUND, MVT::f128, Legal);
1129 
1130       setOperationAction(ISD::SELECT, MVT::f128, Expand);
1131       setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1132       setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1133       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1134       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1135       setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1136       // No implementation for these ops for PowerPC.
1137       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1138       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1139       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1140       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1141       setOperationAction(ISD::FREM, MVT::f128, Expand);
1142 
1143       // Handle constrained floating-point operations of fp128
1144       setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1145       setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1146       setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1147       setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1148       setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1149       setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1150       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1151       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1152       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1153       setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1154       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1155       setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1156       setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1157       setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1158       setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1159       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1160       setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1161       setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1162       setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1163       setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1164     }
1165 
1166     if (Subtarget.hasP9Altivec()) {
1167       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1168       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1169 
1170       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1171       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1172       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1173       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1174       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1175       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1176       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1177     }
1178   }
1179 
1180   if (Subtarget.pairedVectorMemops()) {
1181     addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1182     setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1183     setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1184   }
1185   if (Subtarget.hasMMA()) {
1186     addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1187     setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1188     setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1189     setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1190   }
1191 
1192   if (Subtarget.has64BitSupport())
1193     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1194 
1195   if (Subtarget.isISA3_1())
1196     setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1197 
1198   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1199 
1200   if (!isPPC64) {
1201     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1202     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1203   }
1204 
1205   setBooleanContents(ZeroOrOneBooleanContent);
1206 
1207   if (Subtarget.hasAltivec()) {
1208     // Altivec instructions set fields to all zeros or all ones.
1209     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1210   }
1211 
1212   if (!isPPC64) {
1213     // These libcalls are not available in 32-bit.
1214     setLibcallName(RTLIB::SHL_I128, nullptr);
1215     setLibcallName(RTLIB::SRL_I128, nullptr);
1216     setLibcallName(RTLIB::SRA_I128, nullptr);
1217   }
1218 
1219   if (!isPPC64)
1220     setMaxAtomicSizeInBitsSupported(32);
1221 
1222   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1223 
1224   // We have target-specific dag combine patterns for the following nodes:
1225   setTargetDAGCombine(ISD::ADD);
1226   setTargetDAGCombine(ISD::SHL);
1227   setTargetDAGCombine(ISD::SRA);
1228   setTargetDAGCombine(ISD::SRL);
1229   setTargetDAGCombine(ISD::MUL);
1230   setTargetDAGCombine(ISD::FMA);
1231   setTargetDAGCombine(ISD::SINT_TO_FP);
1232   setTargetDAGCombine(ISD::BUILD_VECTOR);
1233   if (Subtarget.hasFPCVT())
1234     setTargetDAGCombine(ISD::UINT_TO_FP);
1235   setTargetDAGCombine(ISD::LOAD);
1236   setTargetDAGCombine(ISD::STORE);
1237   setTargetDAGCombine(ISD::BR_CC);
1238   if (Subtarget.useCRBits())
1239     setTargetDAGCombine(ISD::BRCOND);
1240   setTargetDAGCombine(ISD::BSWAP);
1241   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1242   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1243   setTargetDAGCombine(ISD::INTRINSIC_VOID);
1244 
1245   setTargetDAGCombine(ISD::SIGN_EXTEND);
1246   setTargetDAGCombine(ISD::ZERO_EXTEND);
1247   setTargetDAGCombine(ISD::ANY_EXTEND);
1248 
1249   setTargetDAGCombine(ISD::TRUNCATE);
1250   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1251 
1252 
1253   if (Subtarget.useCRBits()) {
1254     setTargetDAGCombine(ISD::TRUNCATE);
1255     setTargetDAGCombine(ISD::SETCC);
1256     setTargetDAGCombine(ISD::SELECT_CC);
1257   }
1258 
1259   if (Subtarget.hasP9Altivec()) {
1260     setTargetDAGCombine(ISD::ABS);
1261     setTargetDAGCombine(ISD::VSELECT);
1262   }
1263 
1264   setLibcallName(RTLIB::LOG_F128, "logf128");
1265   setLibcallName(RTLIB::LOG2_F128, "log2f128");
1266   setLibcallName(RTLIB::LOG10_F128, "log10f128");
1267   setLibcallName(RTLIB::EXP_F128, "expf128");
1268   setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1269   setLibcallName(RTLIB::SIN_F128, "sinf128");
1270   setLibcallName(RTLIB::COS_F128, "cosf128");
1271   setLibcallName(RTLIB::POW_F128, "powf128");
1272   setLibcallName(RTLIB::FMIN_F128, "fminf128");
1273   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1274   setLibcallName(RTLIB::POWI_F128, "__powikf2");
1275   setLibcallName(RTLIB::REM_F128, "fmodf128");
1276 
1277   // With 32 condition bits, we don't need to sink (and duplicate) compares
1278   // aggressively in CodeGenPrep.
1279   if (Subtarget.useCRBits()) {
1280     setHasMultipleConditionRegisters();
1281     setJumpIsExpensive();
1282   }
1283 
1284   setMinFunctionAlignment(Align(4));
1285 
1286   switch (Subtarget.getCPUDirective()) {
1287   default: break;
1288   case PPC::DIR_970:
1289   case PPC::DIR_A2:
1290   case PPC::DIR_E500:
1291   case PPC::DIR_E500mc:
1292   case PPC::DIR_E5500:
1293   case PPC::DIR_PWR4:
1294   case PPC::DIR_PWR5:
1295   case PPC::DIR_PWR5X:
1296   case PPC::DIR_PWR6:
1297   case PPC::DIR_PWR6X:
1298   case PPC::DIR_PWR7:
1299   case PPC::DIR_PWR8:
1300   case PPC::DIR_PWR9:
1301   case PPC::DIR_PWR10:
1302   case PPC::DIR_PWR_FUTURE:
1303     setPrefLoopAlignment(Align(16));
1304     setPrefFunctionAlignment(Align(16));
1305     break;
1306   }
1307 
1308   if (Subtarget.enableMachineScheduler())
1309     setSchedulingPreference(Sched::Source);
1310   else
1311     setSchedulingPreference(Sched::Hybrid);
1312 
1313   computeRegisterProperties(STI.getRegisterInfo());
1314 
1315   // The Freescale cores do better with aggressive inlining of memcpy and
1316   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1317   if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1318       Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1319     MaxStoresPerMemset = 32;
1320     MaxStoresPerMemsetOptSize = 16;
1321     MaxStoresPerMemcpy = 32;
1322     MaxStoresPerMemcpyOptSize = 8;
1323     MaxStoresPerMemmove = 32;
1324     MaxStoresPerMemmoveOptSize = 8;
1325   } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1326     // The A2 also benefits from (very) aggressive inlining of memcpy and
1327     // friends. The overhead of a the function call, even when warm, can be
1328     // over one hundred cycles.
1329     MaxStoresPerMemset = 128;
1330     MaxStoresPerMemcpy = 128;
1331     MaxStoresPerMemmove = 128;
1332     MaxLoadsPerMemcmp = 128;
1333   } else {
1334     MaxLoadsPerMemcmp = 8;
1335     MaxLoadsPerMemcmpOptSize = 4;
1336   }
1337 
1338   IsStrictFPEnabled = true;
1339 
1340   // Let the subtarget (CPU) decide if a predictable select is more expensive
1341   // than the corresponding branch. This information is used in CGP to decide
1342   // when to convert selects into branches.
1343   PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1344 }
1345 
1346 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1347 /// the desired ByVal argument alignment.
1348 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1349   if (MaxAlign == MaxMaxAlign)
1350     return;
1351   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1352     if (MaxMaxAlign >= 32 &&
1353         VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1354       MaxAlign = Align(32);
1355     else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1356              MaxAlign < 16)
1357       MaxAlign = Align(16);
1358   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1359     Align EltAlign;
1360     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1361     if (EltAlign > MaxAlign)
1362       MaxAlign = EltAlign;
1363   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1364     for (auto *EltTy : STy->elements()) {
1365       Align EltAlign;
1366       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1367       if (EltAlign > MaxAlign)
1368         MaxAlign = EltAlign;
1369       if (MaxAlign == MaxMaxAlign)
1370         break;
1371     }
1372   }
1373 }
1374 
1375 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1376 /// function arguments in the caller parameter area.
1377 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1378                                                   const DataLayout &DL) const {
1379   // 16byte and wider vectors are passed on 16byte boundary.
1380   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1381   Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1382   if (Subtarget.hasAltivec())
1383     getMaxByValAlign(Ty, Alignment, Align(16));
1384   return Alignment.value();
1385 }
1386 
1387 bool PPCTargetLowering::useSoftFloat() const {
1388   return Subtarget.useSoftFloat();
1389 }
1390 
1391 bool PPCTargetLowering::hasSPE() const {
1392   return Subtarget.hasSPE();
1393 }
1394 
1395 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1396   return VT.isScalarInteger();
1397 }
1398 
1399 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1400   switch ((PPCISD::NodeType)Opcode) {
1401   case PPCISD::FIRST_NUMBER:    break;
1402   case PPCISD::FSEL:            return "PPCISD::FSEL";
1403   case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
1404   case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
1405   case PPCISD::FCFID:           return "PPCISD::FCFID";
1406   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1407   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1408   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1409   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1410   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1411   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1412   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1413   case PPCISD::FP_TO_UINT_IN_VSR:
1414                                 return "PPCISD::FP_TO_UINT_IN_VSR,";
1415   case PPCISD::FP_TO_SINT_IN_VSR:
1416                                 return "PPCISD::FP_TO_SINT_IN_VSR";
1417   case PPCISD::FRE:             return "PPCISD::FRE";
1418   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1419   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1420   case PPCISD::VPERM:           return "PPCISD::VPERM";
1421   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1422   case PPCISD::XXSPLTI_SP_TO_DP:
1423     return "PPCISD::XXSPLTI_SP_TO_DP";
1424   case PPCISD::XXSPLTI32DX:
1425     return "PPCISD::XXSPLTI32DX";
1426   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1427   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1428   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1429   case PPCISD::CMPB:            return "PPCISD::CMPB";
1430   case PPCISD::Hi:              return "PPCISD::Hi";
1431   case PPCISD::Lo:              return "PPCISD::Lo";
1432   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1433   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1434   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1435   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1436   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1437   case PPCISD::PROBED_ALLOCA:   return "PPCISD::PROBED_ALLOCA";
1438   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1439   case PPCISD::SRL:             return "PPCISD::SRL";
1440   case PPCISD::SRA:             return "PPCISD::SRA";
1441   case PPCISD::SHL:             return "PPCISD::SHL";
1442   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1443   case PPCISD::CALL:            return "PPCISD::CALL";
1444   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1445   case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1446   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1447   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1448   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1449   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1450   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1451   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1452   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1453   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1454   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1455   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1456   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1457   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1458   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1459   case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1460     return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1461   case PPCISD::ANDI_rec_1_EQ_BIT:
1462     return "PPCISD::ANDI_rec_1_EQ_BIT";
1463   case PPCISD::ANDI_rec_1_GT_BIT:
1464     return "PPCISD::ANDI_rec_1_GT_BIT";
1465   case PPCISD::VCMP:            return "PPCISD::VCMP";
1466   case PPCISD::VCMP_rec:        return "PPCISD::VCMP_rec";
1467   case PPCISD::LBRX:            return "PPCISD::LBRX";
1468   case PPCISD::STBRX:           return "PPCISD::STBRX";
1469   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1470   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1471   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1472   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1473   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1474   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1475   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1476   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1477   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1478   case PPCISD::ST_VSR_SCAL_INT:
1479                                 return "PPCISD::ST_VSR_SCAL_INT";
1480   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1481   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1482   case PPCISD::BDZ:             return "PPCISD::BDZ";
1483   case PPCISD::MFFS:            return "PPCISD::MFFS";
1484   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1485   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1486   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1487   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1488   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1489   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1490   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1491   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1492   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1493   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1494   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1495   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1496   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1497   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1498   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1499   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1500   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1501   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1502   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1503   case PPCISD::PADDI_DTPREL:
1504     return "PPCISD::PADDI_DTPREL";
1505   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1506   case PPCISD::SC:              return "PPCISD::SC";
1507   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1508   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1509   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1510   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1511   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1512   case PPCISD::VABSD:           return "PPCISD::VABSD";
1513   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1514   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1515   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1516   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1517   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1518   case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1519   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1520   case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1521     return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1522   case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1523     return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1524   case PPCISD::ACC_BUILD:       return "PPCISD::ACC_BUILD";
1525   case PPCISD::PAIR_BUILD:      return "PPCISD::PAIR_BUILD";
1526   case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1527   case PPCISD::XXMFACC:         return "PPCISD::XXMFACC";
1528   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1529   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
1530   case PPCISD::STRICT_FADDRTZ:
1531     return "PPCISD::STRICT_FADDRTZ";
1532   case PPCISD::STRICT_FCTIDZ:
1533     return "PPCISD::STRICT_FCTIDZ";
1534   case PPCISD::STRICT_FCTIWZ:
1535     return "PPCISD::STRICT_FCTIWZ";
1536   case PPCISD::STRICT_FCTIDUZ:
1537     return "PPCISD::STRICT_FCTIDUZ";
1538   case PPCISD::STRICT_FCTIWUZ:
1539     return "PPCISD::STRICT_FCTIWUZ";
1540   case PPCISD::STRICT_FCFID:
1541     return "PPCISD::STRICT_FCFID";
1542   case PPCISD::STRICT_FCFIDU:
1543     return "PPCISD::STRICT_FCFIDU";
1544   case PPCISD::STRICT_FCFIDS:
1545     return "PPCISD::STRICT_FCFIDS";
1546   case PPCISD::STRICT_FCFIDUS:
1547     return "PPCISD::STRICT_FCFIDUS";
1548   case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
1549   }
1550   return nullptr;
1551 }
1552 
1553 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1554                                           EVT VT) const {
1555   if (!VT.isVector())
1556     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1557 
1558   return VT.changeVectorElementTypeToInteger();
1559 }
1560 
1561 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1562   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1563   return true;
1564 }
1565 
1566 //===----------------------------------------------------------------------===//
1567 // Node matching predicates, for use by the tblgen matching code.
1568 //===----------------------------------------------------------------------===//
1569 
1570 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1571 static bool isFloatingPointZero(SDValue Op) {
1572   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1573     return CFP->getValueAPF().isZero();
1574   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1575     // Maybe this has already been legalized into the constant pool?
1576     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1577       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1578         return CFP->getValueAPF().isZero();
1579   }
1580   return false;
1581 }
1582 
1583 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1584 /// true if Op is undef or if it matches the specified value.
1585 static bool isConstantOrUndef(int Op, int Val) {
1586   return Op < 0 || Op == Val;
1587 }
1588 
1589 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1590 /// VPKUHUM instruction.
1591 /// The ShuffleKind distinguishes between big-endian operations with
1592 /// two different inputs (0), either-endian operations with two identical
1593 /// inputs (1), and little-endian operations with two different inputs (2).
1594 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1595 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1596                                SelectionDAG &DAG) {
1597   bool IsLE = DAG.getDataLayout().isLittleEndian();
1598   if (ShuffleKind == 0) {
1599     if (IsLE)
1600       return false;
1601     for (unsigned i = 0; i != 16; ++i)
1602       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1603         return false;
1604   } else if (ShuffleKind == 2) {
1605     if (!IsLE)
1606       return false;
1607     for (unsigned i = 0; i != 16; ++i)
1608       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1609         return false;
1610   } else if (ShuffleKind == 1) {
1611     unsigned j = IsLE ? 0 : 1;
1612     for (unsigned i = 0; i != 8; ++i)
1613       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1614           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1615         return false;
1616   }
1617   return true;
1618 }
1619 
1620 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1621 /// VPKUWUM instruction.
1622 /// The ShuffleKind distinguishes between big-endian operations with
1623 /// two different inputs (0), either-endian operations with two identical
1624 /// inputs (1), and little-endian operations with two different inputs (2).
1625 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1626 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1627                                SelectionDAG &DAG) {
1628   bool IsLE = DAG.getDataLayout().isLittleEndian();
1629   if (ShuffleKind == 0) {
1630     if (IsLE)
1631       return false;
1632     for (unsigned i = 0; i != 16; i += 2)
1633       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1634           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1635         return false;
1636   } else if (ShuffleKind == 2) {
1637     if (!IsLE)
1638       return false;
1639     for (unsigned i = 0; i != 16; i += 2)
1640       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1641           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1642         return false;
1643   } else if (ShuffleKind == 1) {
1644     unsigned j = IsLE ? 0 : 2;
1645     for (unsigned i = 0; i != 8; i += 2)
1646       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1647           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1648           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1649           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1650         return false;
1651   }
1652   return true;
1653 }
1654 
1655 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1656 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1657 /// current subtarget.
1658 ///
1659 /// The ShuffleKind distinguishes between big-endian operations with
1660 /// two different inputs (0), either-endian operations with two identical
1661 /// inputs (1), and little-endian operations with two different inputs (2).
1662 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1663 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1664                                SelectionDAG &DAG) {
1665   const PPCSubtarget& Subtarget =
1666       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1667   if (!Subtarget.hasP8Vector())
1668     return false;
1669 
1670   bool IsLE = DAG.getDataLayout().isLittleEndian();
1671   if (ShuffleKind == 0) {
1672     if (IsLE)
1673       return false;
1674     for (unsigned i = 0; i != 16; i += 4)
1675       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1676           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1677           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1678           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1679         return false;
1680   } else if (ShuffleKind == 2) {
1681     if (!IsLE)
1682       return false;
1683     for (unsigned i = 0; i != 16; i += 4)
1684       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1685           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1686           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1687           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1688         return false;
1689   } else if (ShuffleKind == 1) {
1690     unsigned j = IsLE ? 0 : 4;
1691     for (unsigned i = 0; i != 8; i += 4)
1692       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1693           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1694           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1695           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1696           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1697           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1698           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1699           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1700         return false;
1701   }
1702   return true;
1703 }
1704 
1705 /// isVMerge - Common function, used to match vmrg* shuffles.
1706 ///
1707 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1708                      unsigned LHSStart, unsigned RHSStart) {
1709   if (N->getValueType(0) != MVT::v16i8)
1710     return false;
1711   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1712          "Unsupported merge size!");
1713 
1714   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1715     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1716       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1717                              LHSStart+j+i*UnitSize) ||
1718           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1719                              RHSStart+j+i*UnitSize))
1720         return false;
1721     }
1722   return true;
1723 }
1724 
1725 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1726 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1727 /// The ShuffleKind distinguishes between big-endian merges with two
1728 /// different inputs (0), either-endian merges with two identical inputs (1),
1729 /// and little-endian merges with two different inputs (2).  For the latter,
1730 /// the input operands are swapped (see PPCInstrAltivec.td).
1731 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1732                              unsigned ShuffleKind, SelectionDAG &DAG) {
1733   if (DAG.getDataLayout().isLittleEndian()) {
1734     if (ShuffleKind == 1) // unary
1735       return isVMerge(N, UnitSize, 0, 0);
1736     else if (ShuffleKind == 2) // swapped
1737       return isVMerge(N, UnitSize, 0, 16);
1738     else
1739       return false;
1740   } else {
1741     if (ShuffleKind == 1) // unary
1742       return isVMerge(N, UnitSize, 8, 8);
1743     else if (ShuffleKind == 0) // normal
1744       return isVMerge(N, UnitSize, 8, 24);
1745     else
1746       return false;
1747   }
1748 }
1749 
1750 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1751 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1752 /// The ShuffleKind distinguishes between big-endian merges with two
1753 /// different inputs (0), either-endian merges with two identical inputs (1),
1754 /// and little-endian merges with two different inputs (2).  For the latter,
1755 /// the input operands are swapped (see PPCInstrAltivec.td).
1756 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1757                              unsigned ShuffleKind, SelectionDAG &DAG) {
1758   if (DAG.getDataLayout().isLittleEndian()) {
1759     if (ShuffleKind == 1) // unary
1760       return isVMerge(N, UnitSize, 8, 8);
1761     else if (ShuffleKind == 2) // swapped
1762       return isVMerge(N, UnitSize, 8, 24);
1763     else
1764       return false;
1765   } else {
1766     if (ShuffleKind == 1) // unary
1767       return isVMerge(N, UnitSize, 0, 0);
1768     else if (ShuffleKind == 0) // normal
1769       return isVMerge(N, UnitSize, 0, 16);
1770     else
1771       return false;
1772   }
1773 }
1774 
1775 /**
1776  * Common function used to match vmrgew and vmrgow shuffles
1777  *
1778  * The indexOffset determines whether to look for even or odd words in
1779  * the shuffle mask. This is based on the of the endianness of the target
1780  * machine.
1781  *   - Little Endian:
1782  *     - Use offset of 0 to check for odd elements
1783  *     - Use offset of 4 to check for even elements
1784  *   - Big Endian:
1785  *     - Use offset of 0 to check for even elements
1786  *     - Use offset of 4 to check for odd elements
1787  * A detailed description of the vector element ordering for little endian and
1788  * big endian can be found at
1789  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1790  * Targeting your applications - what little endian and big endian IBM XL C/C++
1791  * compiler differences mean to you
1792  *
1793  * The mask to the shuffle vector instruction specifies the indices of the
1794  * elements from the two input vectors to place in the result. The elements are
1795  * numbered in array-access order, starting with the first vector. These vectors
1796  * are always of type v16i8, thus each vector will contain 16 elements of size
1797  * 8. More info on the shuffle vector can be found in the
1798  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1799  * Language Reference.
1800  *
1801  * The RHSStartValue indicates whether the same input vectors are used (unary)
1802  * or two different input vectors are used, based on the following:
1803  *   - If the instruction uses the same vector for both inputs, the range of the
1804  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1805  *     be 0.
1806  *   - If the instruction has two different vectors then the range of the
1807  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1808  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1809  *     to 31 specify elements in the second vector).
1810  *
1811  * \param[in] N The shuffle vector SD Node to analyze
1812  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1813  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1814  * vector to the shuffle_vector instruction
1815  * \return true iff this shuffle vector represents an even or odd word merge
1816  */
1817 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1818                      unsigned RHSStartValue) {
1819   if (N->getValueType(0) != MVT::v16i8)
1820     return false;
1821 
1822   for (unsigned i = 0; i < 2; ++i)
1823     for (unsigned j = 0; j < 4; ++j)
1824       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1825                              i*RHSStartValue+j+IndexOffset) ||
1826           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1827                              i*RHSStartValue+j+IndexOffset+8))
1828         return false;
1829   return true;
1830 }
1831 
1832 /**
1833  * Determine if the specified shuffle mask is suitable for the vmrgew or
1834  * vmrgow instructions.
1835  *
1836  * \param[in] N The shuffle vector SD Node to analyze
1837  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1838  * \param[in] ShuffleKind Identify the type of merge:
1839  *   - 0 = big-endian merge with two different inputs;
1840  *   - 1 = either-endian merge with two identical inputs;
1841  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1842  *     little-endian merges).
1843  * \param[in] DAG The current SelectionDAG
1844  * \return true iff this shuffle mask
1845  */
1846 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1847                               unsigned ShuffleKind, SelectionDAG &DAG) {
1848   if (DAG.getDataLayout().isLittleEndian()) {
1849     unsigned indexOffset = CheckEven ? 4 : 0;
1850     if (ShuffleKind == 1) // Unary
1851       return isVMerge(N, indexOffset, 0);
1852     else if (ShuffleKind == 2) // swapped
1853       return isVMerge(N, indexOffset, 16);
1854     else
1855       return false;
1856   }
1857   else {
1858     unsigned indexOffset = CheckEven ? 0 : 4;
1859     if (ShuffleKind == 1) // Unary
1860       return isVMerge(N, indexOffset, 0);
1861     else if (ShuffleKind == 0) // Normal
1862       return isVMerge(N, indexOffset, 16);
1863     else
1864       return false;
1865   }
1866   return false;
1867 }
1868 
1869 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1870 /// amount, otherwise return -1.
1871 /// The ShuffleKind distinguishes between big-endian operations with two
1872 /// different inputs (0), either-endian operations with two identical inputs
1873 /// (1), and little-endian operations with two different inputs (2).  For the
1874 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1875 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1876                              SelectionDAG &DAG) {
1877   if (N->getValueType(0) != MVT::v16i8)
1878     return -1;
1879 
1880   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1881 
1882   // Find the first non-undef value in the shuffle mask.
1883   unsigned i;
1884   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1885     /*search*/;
1886 
1887   if (i == 16) return -1;  // all undef.
1888 
1889   // Otherwise, check to see if the rest of the elements are consecutively
1890   // numbered from this value.
1891   unsigned ShiftAmt = SVOp->getMaskElt(i);
1892   if (ShiftAmt < i) return -1;
1893 
1894   ShiftAmt -= i;
1895   bool isLE = DAG.getDataLayout().isLittleEndian();
1896 
1897   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1898     // Check the rest of the elements to see if they are consecutive.
1899     for (++i; i != 16; ++i)
1900       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1901         return -1;
1902   } else if (ShuffleKind == 1) {
1903     // Check the rest of the elements to see if they are consecutive.
1904     for (++i; i != 16; ++i)
1905       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1906         return -1;
1907   } else
1908     return -1;
1909 
1910   if (isLE)
1911     ShiftAmt = 16 - ShiftAmt;
1912 
1913   return ShiftAmt;
1914 }
1915 
1916 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1917 /// specifies a splat of a single element that is suitable for input to
1918 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1919 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1920   assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1921          EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
1922 
1923   // The consecutive indices need to specify an element, not part of two
1924   // different elements.  So abandon ship early if this isn't the case.
1925   if (N->getMaskElt(0) % EltSize != 0)
1926     return false;
1927 
1928   // This is a splat operation if each element of the permute is the same, and
1929   // if the value doesn't reference the second vector.
1930   unsigned ElementBase = N->getMaskElt(0);
1931 
1932   // FIXME: Handle UNDEF elements too!
1933   if (ElementBase >= 16)
1934     return false;
1935 
1936   // Check that the indices are consecutive, in the case of a multi-byte element
1937   // splatted with a v16i8 mask.
1938   for (unsigned i = 1; i != EltSize; ++i)
1939     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1940       return false;
1941 
1942   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1943     if (N->getMaskElt(i) < 0) continue;
1944     for (unsigned j = 0; j != EltSize; ++j)
1945       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1946         return false;
1947   }
1948   return true;
1949 }
1950 
1951 /// Check that the mask is shuffling N byte elements. Within each N byte
1952 /// element of the mask, the indices could be either in increasing or
1953 /// decreasing order as long as they are consecutive.
1954 /// \param[in] N the shuffle vector SD Node to analyze
1955 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1956 /// Word/DoubleWord/QuadWord).
1957 /// \param[in] StepLen the delta indices number among the N byte element, if
1958 /// the mask is in increasing/decreasing order then it is 1/-1.
1959 /// \return true iff the mask is shuffling N byte elements.
1960 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1961                                    int StepLen) {
1962   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1963          "Unexpected element width.");
1964   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1965 
1966   unsigned NumOfElem = 16 / Width;
1967   unsigned MaskVal[16]; //  Width is never greater than 16
1968   for (unsigned i = 0; i < NumOfElem; ++i) {
1969     MaskVal[0] = N->getMaskElt(i * Width);
1970     if ((StepLen == 1) && (MaskVal[0] % Width)) {
1971       return false;
1972     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1973       return false;
1974     }
1975 
1976     for (unsigned int j = 1; j < Width; ++j) {
1977       MaskVal[j] = N->getMaskElt(i * Width + j);
1978       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1979         return false;
1980       }
1981     }
1982   }
1983 
1984   return true;
1985 }
1986 
1987 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1988                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1989   if (!isNByteElemShuffleMask(N, 4, 1))
1990     return false;
1991 
1992   // Now we look at mask elements 0,4,8,12
1993   unsigned M0 = N->getMaskElt(0) / 4;
1994   unsigned M1 = N->getMaskElt(4) / 4;
1995   unsigned M2 = N->getMaskElt(8) / 4;
1996   unsigned M3 = N->getMaskElt(12) / 4;
1997   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1998   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1999 
2000   // Below, let H and L be arbitrary elements of the shuffle mask
2001   // where H is in the range [4,7] and L is in the range [0,3].
2002   // H, 1, 2, 3 or L, 5, 6, 7
2003   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2004       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2005     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2006     InsertAtByte = IsLE ? 12 : 0;
2007     Swap = M0 < 4;
2008     return true;
2009   }
2010   // 0, H, 2, 3 or 4, L, 6, 7
2011   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2012       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2013     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2014     InsertAtByte = IsLE ? 8 : 4;
2015     Swap = M1 < 4;
2016     return true;
2017   }
2018   // 0, 1, H, 3 or 4, 5, L, 7
2019   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2020       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2021     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2022     InsertAtByte = IsLE ? 4 : 8;
2023     Swap = M2 < 4;
2024     return true;
2025   }
2026   // 0, 1, 2, H or 4, 5, 6, L
2027   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2028       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2029     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2030     InsertAtByte = IsLE ? 0 : 12;
2031     Swap = M3 < 4;
2032     return true;
2033   }
2034 
2035   // If both vector operands for the shuffle are the same vector, the mask will
2036   // contain only elements from the first one and the second one will be undef.
2037   if (N->getOperand(1).isUndef()) {
2038     ShiftElts = 0;
2039     Swap = true;
2040     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2041     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2042       InsertAtByte = IsLE ? 12 : 0;
2043       return true;
2044     }
2045     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2046       InsertAtByte = IsLE ? 8 : 4;
2047       return true;
2048     }
2049     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2050       InsertAtByte = IsLE ? 4 : 8;
2051       return true;
2052     }
2053     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2054       InsertAtByte = IsLE ? 0 : 12;
2055       return true;
2056     }
2057   }
2058 
2059   return false;
2060 }
2061 
2062 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2063                                bool &Swap, bool IsLE) {
2064   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2065   // Ensure each byte index of the word is consecutive.
2066   if (!isNByteElemShuffleMask(N, 4, 1))
2067     return false;
2068 
2069   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2070   unsigned M0 = N->getMaskElt(0) / 4;
2071   unsigned M1 = N->getMaskElt(4) / 4;
2072   unsigned M2 = N->getMaskElt(8) / 4;
2073   unsigned M3 = N->getMaskElt(12) / 4;
2074 
2075   // If both vector operands for the shuffle are the same vector, the mask will
2076   // contain only elements from the first one and the second one will be undef.
2077   if (N->getOperand(1).isUndef()) {
2078     assert(M0 < 4 && "Indexing into an undef vector?");
2079     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2080       return false;
2081 
2082     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2083     Swap = false;
2084     return true;
2085   }
2086 
2087   // Ensure each word index of the ShuffleVector Mask is consecutive.
2088   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2089     return false;
2090 
2091   if (IsLE) {
2092     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2093       // Input vectors don't need to be swapped if the leading element
2094       // of the result is one of the 3 left elements of the second vector
2095       // (or if there is no shift to be done at all).
2096       Swap = false;
2097       ShiftElts = (8 - M0) % 8;
2098     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2099       // Input vectors need to be swapped if the leading element
2100       // of the result is one of the 3 left elements of the first vector
2101       // (or if we're shifting by 4 - thereby simply swapping the vectors).
2102       Swap = true;
2103       ShiftElts = (4 - M0) % 4;
2104     }
2105 
2106     return true;
2107   } else {                                          // BE
2108     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2109       // Input vectors don't need to be swapped if the leading element
2110       // of the result is one of the 4 elements of the first vector.
2111       Swap = false;
2112       ShiftElts = M0;
2113     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2114       // Input vectors need to be swapped if the leading element
2115       // of the result is one of the 4 elements of the right vector.
2116       Swap = true;
2117       ShiftElts = M0 - 4;
2118     }
2119 
2120     return true;
2121   }
2122 }
2123 
2124 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2125   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2126 
2127   if (!isNByteElemShuffleMask(N, Width, -1))
2128     return false;
2129 
2130   for (int i = 0; i < 16; i += Width)
2131     if (N->getMaskElt(i) != i + Width - 1)
2132       return false;
2133 
2134   return true;
2135 }
2136 
2137 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2138   return isXXBRShuffleMaskHelper(N, 2);
2139 }
2140 
2141 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2142   return isXXBRShuffleMaskHelper(N, 4);
2143 }
2144 
2145 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2146   return isXXBRShuffleMaskHelper(N, 8);
2147 }
2148 
2149 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2150   return isXXBRShuffleMaskHelper(N, 16);
2151 }
2152 
2153 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2154 /// if the inputs to the instruction should be swapped and set \p DM to the
2155 /// value for the immediate.
2156 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2157 /// AND element 0 of the result comes from the first input (LE) or second input
2158 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2159 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2160 /// mask.
2161 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2162                                bool &Swap, bool IsLE) {
2163   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2164 
2165   // Ensure each byte index of the double word is consecutive.
2166   if (!isNByteElemShuffleMask(N, 8, 1))
2167     return false;
2168 
2169   unsigned M0 = N->getMaskElt(0) / 8;
2170   unsigned M1 = N->getMaskElt(8) / 8;
2171   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2172 
2173   // If both vector operands for the shuffle are the same vector, the mask will
2174   // contain only elements from the first one and the second one will be undef.
2175   if (N->getOperand(1).isUndef()) {
2176     if ((M0 | M1) < 2) {
2177       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2178       Swap = false;
2179       return true;
2180     } else
2181       return false;
2182   }
2183 
2184   if (IsLE) {
2185     if (M0 > 1 && M1 < 2) {
2186       Swap = false;
2187     } else if (M0 < 2 && M1 > 1) {
2188       M0 = (M0 + 2) % 4;
2189       M1 = (M1 + 2) % 4;
2190       Swap = true;
2191     } else
2192       return false;
2193 
2194     // Note: if control flow comes here that means Swap is already set above
2195     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2196     return true;
2197   } else { // BE
2198     if (M0 < 2 && M1 > 1) {
2199       Swap = false;
2200     } else if (M0 > 1 && M1 < 2) {
2201       M0 = (M0 + 2) % 4;
2202       M1 = (M1 + 2) % 4;
2203       Swap = true;
2204     } else
2205       return false;
2206 
2207     // Note: if control flow comes here that means Swap is already set above
2208     DM = (M0 << 1) + (M1 & 1);
2209     return true;
2210   }
2211 }
2212 
2213 
2214 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2215 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2216 /// elements are counted from the left of the vector register).
2217 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2218                                          SelectionDAG &DAG) {
2219   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2220   assert(isSplatShuffleMask(SVOp, EltSize));
2221   if (DAG.getDataLayout().isLittleEndian())
2222     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2223   else
2224     return SVOp->getMaskElt(0) / EltSize;
2225 }
2226 
2227 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2228 /// by using a vspltis[bhw] instruction of the specified element size, return
2229 /// the constant being splatted.  The ByteSize field indicates the number of
2230 /// bytes of each element [124] -> [bhw].
2231 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2232   SDValue OpVal(nullptr, 0);
2233 
2234   // If ByteSize of the splat is bigger than the element size of the
2235   // build_vector, then we have a case where we are checking for a splat where
2236   // multiple elements of the buildvector are folded together into a single
2237   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2238   unsigned EltSize = 16/N->getNumOperands();
2239   if (EltSize < ByteSize) {
2240     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2241     SDValue UniquedVals[4];
2242     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2243 
2244     // See if all of the elements in the buildvector agree across.
2245     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2246       if (N->getOperand(i).isUndef()) continue;
2247       // If the element isn't a constant, bail fully out.
2248       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2249 
2250       if (!UniquedVals[i&(Multiple-1)].getNode())
2251         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2252       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2253         return SDValue();  // no match.
2254     }
2255 
2256     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2257     // either constant or undef values that are identical for each chunk.  See
2258     // if these chunks can form into a larger vspltis*.
2259 
2260     // Check to see if all of the leading entries are either 0 or -1.  If
2261     // neither, then this won't fit into the immediate field.
2262     bool LeadingZero = true;
2263     bool LeadingOnes = true;
2264     for (unsigned i = 0; i != Multiple-1; ++i) {
2265       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2266 
2267       LeadingZero &= isNullConstant(UniquedVals[i]);
2268       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2269     }
2270     // Finally, check the least significant entry.
2271     if (LeadingZero) {
2272       if (!UniquedVals[Multiple-1].getNode())
2273         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2274       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2275       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2276         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2277     }
2278     if (LeadingOnes) {
2279       if (!UniquedVals[Multiple-1].getNode())
2280         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2281       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2282       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2283         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2284     }
2285 
2286     return SDValue();
2287   }
2288 
2289   // Check to see if this buildvec has a single non-undef value in its elements.
2290   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2291     if (N->getOperand(i).isUndef()) continue;
2292     if (!OpVal.getNode())
2293       OpVal = N->getOperand(i);
2294     else if (OpVal != N->getOperand(i))
2295       return SDValue();
2296   }
2297 
2298   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2299 
2300   unsigned ValSizeInBytes = EltSize;
2301   uint64_t Value = 0;
2302   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2303     Value = CN->getZExtValue();
2304   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2305     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2306     Value = FloatToBits(CN->getValueAPF().convertToFloat());
2307   }
2308 
2309   // If the splat value is larger than the element value, then we can never do
2310   // this splat.  The only case that we could fit the replicated bits into our
2311   // immediate field for would be zero, and we prefer to use vxor for it.
2312   if (ValSizeInBytes < ByteSize) return SDValue();
2313 
2314   // If the element value is larger than the splat value, check if it consists
2315   // of a repeated bit pattern of size ByteSize.
2316   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2317     return SDValue();
2318 
2319   // Properly sign extend the value.
2320   int MaskVal = SignExtend32(Value, ByteSize * 8);
2321 
2322   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2323   if (MaskVal == 0) return SDValue();
2324 
2325   // Finally, if this value fits in a 5 bit sext field, return it
2326   if (SignExtend32<5>(MaskVal) == MaskVal)
2327     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2328   return SDValue();
2329 }
2330 
2331 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2332 /// amount, otherwise return -1.
2333 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2334   EVT VT = N->getValueType(0);
2335   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2336     return -1;
2337 
2338   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2339 
2340   // Find the first non-undef value in the shuffle mask.
2341   unsigned i;
2342   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2343     /*search*/;
2344 
2345   if (i == 4) return -1;  // all undef.
2346 
2347   // Otherwise, check to see if the rest of the elements are consecutively
2348   // numbered from this value.
2349   unsigned ShiftAmt = SVOp->getMaskElt(i);
2350   if (ShiftAmt < i) return -1;
2351   ShiftAmt -= i;
2352 
2353   // Check the rest of the elements to see if they are consecutive.
2354   for (++i; i != 4; ++i)
2355     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2356       return -1;
2357 
2358   return ShiftAmt;
2359 }
2360 
2361 //===----------------------------------------------------------------------===//
2362 //  Addressing Mode Selection
2363 //===----------------------------------------------------------------------===//
2364 
2365 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2366 /// or 64-bit immediate, and if the value can be accurately represented as a
2367 /// sign extension from a 16-bit value.  If so, this returns true and the
2368 /// immediate.
2369 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2370   if (!isa<ConstantSDNode>(N))
2371     return false;
2372 
2373   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2374   if (N->getValueType(0) == MVT::i32)
2375     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2376   else
2377     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2378 }
2379 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2380   return isIntS16Immediate(Op.getNode(), Imm);
2381 }
2382 
2383 
2384 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2385 /// be represented as an indexed [r+r] operation.
2386 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2387                                                SDValue &Index,
2388                                                SelectionDAG &DAG) const {
2389   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2390       UI != E; ++UI) {
2391     if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2392       if (Memop->getMemoryVT() == MVT::f64) {
2393           Base = N.getOperand(0);
2394           Index = N.getOperand(1);
2395           return true;
2396       }
2397     }
2398   }
2399   return false;
2400 }
2401 
2402 /// isIntS34Immediate - This method tests if value of node given can be
2403 /// accurately represented as a sign extension from a 34-bit value.  If so,
2404 /// this returns true and the immediate.
2405 bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2406   if (!isa<ConstantSDNode>(N))
2407     return false;
2408 
2409   Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2410   return isInt<34>(Imm);
2411 }
2412 bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2413   return isIntS34Immediate(Op.getNode(), Imm);
2414 }
2415 
2416 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2417 /// can be represented as an indexed [r+r] operation.  Returns false if it
2418 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2419 /// non-zero and N can be represented by a base register plus a signed 16-bit
2420 /// displacement, make a more precise judgement by checking (displacement % \p
2421 /// EncodingAlignment).
2422 bool PPCTargetLowering::SelectAddressRegReg(
2423     SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2424     MaybeAlign EncodingAlignment) const {
2425   // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2426   // a [pc+imm].
2427   if (SelectAddressPCRel(N, Base))
2428     return false;
2429 
2430   int16_t Imm = 0;
2431   if (N.getOpcode() == ISD::ADD) {
2432     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2433     // SPE load/store can only handle 8-bit offsets.
2434     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2435         return true;
2436     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2437         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2438       return false; // r+i
2439     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2440       return false;    // r+i
2441 
2442     Base = N.getOperand(0);
2443     Index = N.getOperand(1);
2444     return true;
2445   } else if (N.getOpcode() == ISD::OR) {
2446     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2447         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2448       return false; // r+i can fold it if we can.
2449 
2450     // If this is an or of disjoint bitfields, we can codegen this as an add
2451     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2452     // disjoint.
2453     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2454 
2455     if (LHSKnown.Zero.getBoolValue()) {
2456       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2457       // If all of the bits are known zero on the LHS or RHS, the add won't
2458       // carry.
2459       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2460         Base = N.getOperand(0);
2461         Index = N.getOperand(1);
2462         return true;
2463       }
2464     }
2465   }
2466 
2467   return false;
2468 }
2469 
2470 // If we happen to be doing an i64 load or store into a stack slot that has
2471 // less than a 4-byte alignment, then the frame-index elimination may need to
2472 // use an indexed load or store instruction (because the offset may not be a
2473 // multiple of 4). The extra register needed to hold the offset comes from the
2474 // register scavenger, and it is possible that the scavenger will need to use
2475 // an emergency spill slot. As a result, we need to make sure that a spill slot
2476 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2477 // stack slot.
2478 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2479   // FIXME: This does not handle the LWA case.
2480   if (VT != MVT::i64)
2481     return;
2482 
2483   // NOTE: We'll exclude negative FIs here, which come from argument
2484   // lowering, because there are no known test cases triggering this problem
2485   // using packed structures (or similar). We can remove this exclusion if
2486   // we find such a test case. The reason why this is so test-case driven is
2487   // because this entire 'fixup' is only to prevent crashes (from the
2488   // register scavenger) on not-really-valid inputs. For example, if we have:
2489   //   %a = alloca i1
2490   //   %b = bitcast i1* %a to i64*
2491   //   store i64* a, i64 b
2492   // then the store should really be marked as 'align 1', but is not. If it
2493   // were marked as 'align 1' then the indexed form would have been
2494   // instruction-selected initially, and the problem this 'fixup' is preventing
2495   // won't happen regardless.
2496   if (FrameIdx < 0)
2497     return;
2498 
2499   MachineFunction &MF = DAG.getMachineFunction();
2500   MachineFrameInfo &MFI = MF.getFrameInfo();
2501 
2502   if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2503     return;
2504 
2505   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2506   FuncInfo->setHasNonRISpills();
2507 }
2508 
2509 /// Returns true if the address N can be represented by a base register plus
2510 /// a signed 16-bit displacement [r+imm], and if it is not better
2511 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2512 /// displacements that are multiples of that value.
2513 bool PPCTargetLowering::SelectAddressRegImm(
2514     SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2515     MaybeAlign EncodingAlignment) const {
2516   // FIXME dl should come from parent load or store, not from address
2517   SDLoc dl(N);
2518 
2519   // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2520   // a [pc+imm].
2521   if (SelectAddressPCRel(N, Base))
2522     return false;
2523 
2524   // If this can be more profitably realized as r+r, fail.
2525   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2526     return false;
2527 
2528   if (N.getOpcode() == ISD::ADD) {
2529     int16_t imm = 0;
2530     if (isIntS16Immediate(N.getOperand(1), imm) &&
2531         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2532       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2533       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2534         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2535         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2536       } else {
2537         Base = N.getOperand(0);
2538       }
2539       return true; // [r+i]
2540     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2541       // Match LOAD (ADD (X, Lo(G))).
2542       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2543              && "Cannot handle constant offsets yet!");
2544       Disp = N.getOperand(1).getOperand(0);  // The global address.
2545       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2546              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2547              Disp.getOpcode() == ISD::TargetConstantPool ||
2548              Disp.getOpcode() == ISD::TargetJumpTable);
2549       Base = N.getOperand(0);
2550       return true;  // [&g+r]
2551     }
2552   } else if (N.getOpcode() == ISD::OR) {
2553     int16_t imm = 0;
2554     if (isIntS16Immediate(N.getOperand(1), imm) &&
2555         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2556       // If this is an or of disjoint bitfields, we can codegen this as an add
2557       // (for better address arithmetic) if the LHS and RHS of the OR are
2558       // provably disjoint.
2559       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2560 
2561       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2562         // If all of the bits are known zero on the LHS or RHS, the add won't
2563         // carry.
2564         if (FrameIndexSDNode *FI =
2565               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2566           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2567           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2568         } else {
2569           Base = N.getOperand(0);
2570         }
2571         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2572         return true;
2573       }
2574     }
2575   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2576     // Loading from a constant address.
2577 
2578     // If this address fits entirely in a 16-bit sext immediate field, codegen
2579     // this as "d, 0"
2580     int16_t Imm;
2581     if (isIntS16Immediate(CN, Imm) &&
2582         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2583       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2584       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2585                              CN->getValueType(0));
2586       return true;
2587     }
2588 
2589     // Handle 32-bit sext immediates with LIS + addr mode.
2590     if ((CN->getValueType(0) == MVT::i32 ||
2591          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2592         (!EncodingAlignment ||
2593          isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2594       int Addr = (int)CN->getZExtValue();
2595 
2596       // Otherwise, break this down into an LIS + disp.
2597       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2598 
2599       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2600                                    MVT::i32);
2601       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2602       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2603       return true;
2604     }
2605   }
2606 
2607   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2608   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2609     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2610     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2611   } else
2612     Base = N;
2613   return true;      // [r+0]
2614 }
2615 
2616 /// Similar to the 16-bit case but for instructions that take a 34-bit
2617 /// displacement field (prefixed loads/stores).
2618 bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2619                                               SDValue &Base,
2620                                               SelectionDAG &DAG) const {
2621   // Only on 64-bit targets.
2622   if (N.getValueType() != MVT::i64)
2623     return false;
2624 
2625   SDLoc dl(N);
2626   int64_t Imm = 0;
2627 
2628   if (N.getOpcode() == ISD::ADD) {
2629     if (!isIntS34Immediate(N.getOperand(1), Imm))
2630       return false;
2631     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2632     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2633       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2634     else
2635       Base = N.getOperand(0);
2636     return true;
2637   }
2638 
2639   if (N.getOpcode() == ISD::OR) {
2640     if (!isIntS34Immediate(N.getOperand(1), Imm))
2641       return false;
2642     // If this is an or of disjoint bitfields, we can codegen this as an add
2643     // (for better address arithmetic) if the LHS and RHS of the OR are
2644     // provably disjoint.
2645     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2646     if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2647       return false;
2648     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2649       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2650     else
2651       Base = N.getOperand(0);
2652     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2653     return true;
2654   }
2655 
2656   if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2657     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2658     Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2659     return true;
2660   }
2661 
2662   return false;
2663 }
2664 
2665 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2666 /// represented as an indexed [r+r] operation.
2667 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2668                                                 SDValue &Index,
2669                                                 SelectionDAG &DAG) const {
2670   // Check to see if we can easily represent this as an [r+r] address.  This
2671   // will fail if it thinks that the address is more profitably represented as
2672   // reg+imm, e.g. where imm = 0.
2673   if (SelectAddressRegReg(N, Base, Index, DAG))
2674     return true;
2675 
2676   // If the address is the result of an add, we will utilize the fact that the
2677   // address calculation includes an implicit add.  However, we can reduce
2678   // register pressure if we do not materialize a constant just for use as the
2679   // index register.  We only get rid of the add if it is not an add of a
2680   // value and a 16-bit signed constant and both have a single use.
2681   int16_t imm = 0;
2682   if (N.getOpcode() == ISD::ADD &&
2683       (!isIntS16Immediate(N.getOperand(1), imm) ||
2684        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2685     Base = N.getOperand(0);
2686     Index = N.getOperand(1);
2687     return true;
2688   }
2689 
2690   // Otherwise, do it the hard way, using R0 as the base register.
2691   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2692                          N.getValueType());
2693   Index = N;
2694   return true;
2695 }
2696 
2697 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2698   Ty *PCRelCand = dyn_cast<Ty>(N);
2699   return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2700 }
2701 
2702 /// Returns true if this address is a PC Relative address.
2703 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2704 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2705 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2706   // This is a materialize PC Relative node. Always select this as PC Relative.
2707   Base = N;
2708   if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2709     return true;
2710   if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2711       isValidPCRelNode<GlobalAddressSDNode>(N) ||
2712       isValidPCRelNode<JumpTableSDNode>(N) ||
2713       isValidPCRelNode<BlockAddressSDNode>(N))
2714     return true;
2715   return false;
2716 }
2717 
2718 /// Returns true if we should use a direct load into vector instruction
2719 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2720 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2721 
2722   // If there are any other uses other than scalar to vector, then we should
2723   // keep it as a scalar load -> direct move pattern to prevent multiple
2724   // loads.
2725   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2726   if (!LD)
2727     return false;
2728 
2729   EVT MemVT = LD->getMemoryVT();
2730   if (!MemVT.isSimple())
2731     return false;
2732   switch(MemVT.getSimpleVT().SimpleTy) {
2733   case MVT::i64:
2734     break;
2735   case MVT::i32:
2736     if (!ST.hasP8Vector())
2737       return false;
2738     break;
2739   case MVT::i16:
2740   case MVT::i8:
2741     if (!ST.hasP9Vector())
2742       return false;
2743     break;
2744   default:
2745     return false;
2746   }
2747 
2748   SDValue LoadedVal(N, 0);
2749   if (!LoadedVal.hasOneUse())
2750     return false;
2751 
2752   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2753        UI != UE; ++UI)
2754     if (UI.getUse().get().getResNo() == 0 &&
2755         UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2756         UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2757       return false;
2758 
2759   return true;
2760 }
2761 
2762 /// getPreIndexedAddressParts - returns true by value, base pointer and
2763 /// offset pointer and addressing mode by reference if the node's address
2764 /// can be legally represented as pre-indexed load / store address.
2765 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2766                                                   SDValue &Offset,
2767                                                   ISD::MemIndexedMode &AM,
2768                                                   SelectionDAG &DAG) const {
2769   if (DisablePPCPreinc) return false;
2770 
2771   bool isLoad = true;
2772   SDValue Ptr;
2773   EVT VT;
2774   unsigned Alignment;
2775   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2776     Ptr = LD->getBasePtr();
2777     VT = LD->getMemoryVT();
2778     Alignment = LD->getAlignment();
2779   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2780     Ptr = ST->getBasePtr();
2781     VT  = ST->getMemoryVT();
2782     Alignment = ST->getAlignment();
2783     isLoad = false;
2784   } else
2785     return false;
2786 
2787   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2788   // instructions because we can fold these into a more efficient instruction
2789   // instead, (such as LXSD).
2790   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2791     return false;
2792   }
2793 
2794   // PowerPC doesn't have preinc load/store instructions for vectors
2795   if (VT.isVector())
2796     return false;
2797 
2798   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2799     // Common code will reject creating a pre-inc form if the base pointer
2800     // is a frame index, or if N is a store and the base pointer is either
2801     // the same as or a predecessor of the value being stored.  Check for
2802     // those situations here, and try with swapped Base/Offset instead.
2803     bool Swap = false;
2804 
2805     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2806       Swap = true;
2807     else if (!isLoad) {
2808       SDValue Val = cast<StoreSDNode>(N)->getValue();
2809       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2810         Swap = true;
2811     }
2812 
2813     if (Swap)
2814       std::swap(Base, Offset);
2815 
2816     AM = ISD::PRE_INC;
2817     return true;
2818   }
2819 
2820   // LDU/STU can only handle immediates that are a multiple of 4.
2821   if (VT != MVT::i64) {
2822     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2823       return false;
2824   } else {
2825     // LDU/STU need an address with at least 4-byte alignment.
2826     if (Alignment < 4)
2827       return false;
2828 
2829     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2830       return false;
2831   }
2832 
2833   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2834     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2835     // sext i32 to i64 when addr mode is r+i.
2836     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2837         LD->getExtensionType() == ISD::SEXTLOAD &&
2838         isa<ConstantSDNode>(Offset))
2839       return false;
2840   }
2841 
2842   AM = ISD::PRE_INC;
2843   return true;
2844 }
2845 
2846 //===----------------------------------------------------------------------===//
2847 //  LowerOperation implementation
2848 //===----------------------------------------------------------------------===//
2849 
2850 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2851 /// and LoOpFlags to the target MO flags.
2852 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2853                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2854                                const GlobalValue *GV = nullptr) {
2855   HiOpFlags = PPCII::MO_HA;
2856   LoOpFlags = PPCII::MO_LO;
2857 
2858   // Don't use the pic base if not in PIC relocation model.
2859   if (IsPIC) {
2860     HiOpFlags |= PPCII::MO_PIC_FLAG;
2861     LoOpFlags |= PPCII::MO_PIC_FLAG;
2862   }
2863 }
2864 
2865 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2866                              SelectionDAG &DAG) {
2867   SDLoc DL(HiPart);
2868   EVT PtrVT = HiPart.getValueType();
2869   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2870 
2871   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2872   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2873 
2874   // With PIC, the first instruction is actually "GR+hi(&G)".
2875   if (isPIC)
2876     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2877                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2878 
2879   // Generate non-pic code that has direct accesses to the constant pool.
2880   // The address of the global is just (hi(&g)+lo(&g)).
2881   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2882 }
2883 
2884 static void setUsesTOCBasePtr(MachineFunction &MF) {
2885   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2886   FuncInfo->setUsesTOCBasePtr();
2887 }
2888 
2889 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2890   setUsesTOCBasePtr(DAG.getMachineFunction());
2891 }
2892 
2893 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2894                                        SDValue GA) const {
2895   const bool Is64Bit = Subtarget.isPPC64();
2896   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2897   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2898                         : Subtarget.isAIXABI()
2899                               ? DAG.getRegister(PPC::R2, VT)
2900                               : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2901   SDValue Ops[] = { GA, Reg };
2902   return DAG.getMemIntrinsicNode(
2903       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2904       MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2905       MachineMemOperand::MOLoad);
2906 }
2907 
2908 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2909                                              SelectionDAG &DAG) const {
2910   EVT PtrVT = Op.getValueType();
2911   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2912   const Constant *C = CP->getConstVal();
2913 
2914   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2915   // The actual address of the GlobalValue is stored in the TOC.
2916   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2917     if (Subtarget.isUsingPCRelativeCalls()) {
2918       SDLoc DL(CP);
2919       EVT Ty = getPointerTy(DAG.getDataLayout());
2920       SDValue ConstPool = DAG.getTargetConstantPool(
2921           C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2922       return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2923     }
2924     setUsesTOCBasePtr(DAG);
2925     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2926     return getTOCEntry(DAG, SDLoc(CP), GA);
2927   }
2928 
2929   unsigned MOHiFlag, MOLoFlag;
2930   bool IsPIC = isPositionIndependent();
2931   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2932 
2933   if (IsPIC && Subtarget.isSVR4ABI()) {
2934     SDValue GA =
2935         DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2936     return getTOCEntry(DAG, SDLoc(CP), GA);
2937   }
2938 
2939   SDValue CPIHi =
2940       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2941   SDValue CPILo =
2942       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2943   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2944 }
2945 
2946 // For 64-bit PowerPC, prefer the more compact relative encodings.
2947 // This trades 32 bits per jump table entry for one or two instructions
2948 // on the jump site.
2949 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2950   if (isJumpTableRelative())
2951     return MachineJumpTableInfo::EK_LabelDifference32;
2952 
2953   return TargetLowering::getJumpTableEncoding();
2954 }
2955 
2956 bool PPCTargetLowering::isJumpTableRelative() const {
2957   if (UseAbsoluteJumpTables)
2958     return false;
2959   if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2960     return true;
2961   return TargetLowering::isJumpTableRelative();
2962 }
2963 
2964 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2965                                                     SelectionDAG &DAG) const {
2966   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2967     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2968 
2969   switch (getTargetMachine().getCodeModel()) {
2970   case CodeModel::Small:
2971   case CodeModel::Medium:
2972     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2973   default:
2974     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2975                        getPointerTy(DAG.getDataLayout()));
2976   }
2977 }
2978 
2979 const MCExpr *
2980 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2981                                                 unsigned JTI,
2982                                                 MCContext &Ctx) const {
2983   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2984     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2985 
2986   switch (getTargetMachine().getCodeModel()) {
2987   case CodeModel::Small:
2988   case CodeModel::Medium:
2989     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2990   default:
2991     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2992   }
2993 }
2994 
2995 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2996   EVT PtrVT = Op.getValueType();
2997   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2998 
2999   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3000   if (Subtarget.isUsingPCRelativeCalls()) {
3001     SDLoc DL(JT);
3002     EVT Ty = getPointerTy(DAG.getDataLayout());
3003     SDValue GA =
3004         DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3005     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3006     return MatAddr;
3007   }
3008 
3009   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3010   // The actual address of the GlobalValue is stored in the TOC.
3011   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3012     setUsesTOCBasePtr(DAG);
3013     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3014     return getTOCEntry(DAG, SDLoc(JT), GA);
3015   }
3016 
3017   unsigned MOHiFlag, MOLoFlag;
3018   bool IsPIC = isPositionIndependent();
3019   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3020 
3021   if (IsPIC && Subtarget.isSVR4ABI()) {
3022     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3023                                         PPCII::MO_PIC_FLAG);
3024     return getTOCEntry(DAG, SDLoc(GA), GA);
3025   }
3026 
3027   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3028   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3029   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3030 }
3031 
3032 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3033                                              SelectionDAG &DAG) const {
3034   EVT PtrVT = Op.getValueType();
3035   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3036   const BlockAddress *BA = BASDN->getBlockAddress();
3037 
3038   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3039   if (Subtarget.isUsingPCRelativeCalls()) {
3040     SDLoc DL(BASDN);
3041     EVT Ty = getPointerTy(DAG.getDataLayout());
3042     SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3043                                            PPCII::MO_PCREL_FLAG);
3044     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3045     return MatAddr;
3046   }
3047 
3048   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3049   // The actual BlockAddress is stored in the TOC.
3050   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3051     setUsesTOCBasePtr(DAG);
3052     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3053     return getTOCEntry(DAG, SDLoc(BASDN), GA);
3054   }
3055 
3056   // 32-bit position-independent ELF stores the BlockAddress in the .got.
3057   if (Subtarget.is32BitELFABI() && isPositionIndependent())
3058     return getTOCEntry(
3059         DAG, SDLoc(BASDN),
3060         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3061 
3062   unsigned MOHiFlag, MOLoFlag;
3063   bool IsPIC = isPositionIndependent();
3064   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3065   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3066   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3067   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3068 }
3069 
3070 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3071                                               SelectionDAG &DAG) const {
3072   // FIXME: TLS addresses currently use medium model code sequences,
3073   // which is the most useful form.  Eventually support for small and
3074   // large models could be added if users need it, at the cost of
3075   // additional complexity.
3076   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3077   if (DAG.getTarget().useEmulatedTLS())
3078     return LowerToTLSEmulatedModel(GA, DAG);
3079 
3080   SDLoc dl(GA);
3081   const GlobalValue *GV = GA->getGlobal();
3082   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3083   bool is64bit = Subtarget.isPPC64();
3084   const Module *M = DAG.getMachineFunction().getFunction().getParent();
3085   PICLevel::Level picLevel = M->getPICLevel();
3086 
3087   const TargetMachine &TM = getTargetMachine();
3088   TLSModel::Model Model = TM.getTLSModel(GV);
3089 
3090   if (Model == TLSModel::LocalExec) {
3091     if (Subtarget.isUsingPCRelativeCalls()) {
3092       SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3093       SDValue TGA = DAG.getTargetGlobalAddress(
3094           GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3095       SDValue MatAddr =
3096           DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3097       return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3098     }
3099 
3100     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3101                                                PPCII::MO_TPREL_HA);
3102     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3103                                                PPCII::MO_TPREL_LO);
3104     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3105                              : DAG.getRegister(PPC::R2, MVT::i32);
3106 
3107     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3108     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3109   }
3110 
3111   if (Model == TLSModel::InitialExec) {
3112     bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3113     SDValue TGA = DAG.getTargetGlobalAddress(
3114         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3115     SDValue TGATLS = DAG.getTargetGlobalAddress(
3116         GV, dl, PtrVT, 0,
3117         IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3118     SDValue TPOffset;
3119     if (IsPCRel) {
3120       SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3121       TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3122                              MachinePointerInfo());
3123     } else {
3124       SDValue GOTPtr;
3125       if (is64bit) {
3126         setUsesTOCBasePtr(DAG);
3127         SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3128         GOTPtr =
3129             DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3130       } else {
3131         if (!TM.isPositionIndependent())
3132           GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3133         else if (picLevel == PICLevel::SmallPIC)
3134           GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3135         else
3136           GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3137       }
3138       TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3139     }
3140     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3141   }
3142 
3143   if (Model == TLSModel::GeneralDynamic) {
3144     if (Subtarget.isUsingPCRelativeCalls()) {
3145       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3146                                                PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3147       return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3148     }
3149 
3150     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3151     SDValue GOTPtr;
3152     if (is64bit) {
3153       setUsesTOCBasePtr(DAG);
3154       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3155       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3156                                    GOTReg, TGA);
3157     } else {
3158       if (picLevel == PICLevel::SmallPIC)
3159         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3160       else
3161         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3162     }
3163     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3164                        GOTPtr, TGA, TGA);
3165   }
3166 
3167   if (Model == TLSModel::LocalDynamic) {
3168     if (Subtarget.isUsingPCRelativeCalls()) {
3169       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3170                                                PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3171       SDValue MatPCRel =
3172           DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3173       return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3174     }
3175 
3176     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3177     SDValue GOTPtr;
3178     if (is64bit) {
3179       setUsesTOCBasePtr(DAG);
3180       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3181       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3182                            GOTReg, TGA);
3183     } else {
3184       if (picLevel == PICLevel::SmallPIC)
3185         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3186       else
3187         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3188     }
3189     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3190                                   PtrVT, GOTPtr, TGA, TGA);
3191     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3192                                       PtrVT, TLSAddr, TGA);
3193     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3194   }
3195 
3196   llvm_unreachable("Unknown TLS model!");
3197 }
3198 
3199 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3200                                               SelectionDAG &DAG) const {
3201   EVT PtrVT = Op.getValueType();
3202   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3203   SDLoc DL(GSDN);
3204   const GlobalValue *GV = GSDN->getGlobal();
3205 
3206   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3207   // The actual address of the GlobalValue is stored in the TOC.
3208   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3209     if (Subtarget.isUsingPCRelativeCalls()) {
3210       EVT Ty = getPointerTy(DAG.getDataLayout());
3211       if (isAccessedAsGotIndirect(Op)) {
3212         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3213                                                 PPCII::MO_PCREL_FLAG |
3214                                                     PPCII::MO_GOT_FLAG);
3215         SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3216         SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3217                                    MachinePointerInfo());
3218         return Load;
3219       } else {
3220         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3221                                                 PPCII::MO_PCREL_FLAG);
3222         return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3223       }
3224     }
3225     setUsesTOCBasePtr(DAG);
3226     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3227     return getTOCEntry(DAG, DL, GA);
3228   }
3229 
3230   unsigned MOHiFlag, MOLoFlag;
3231   bool IsPIC = isPositionIndependent();
3232   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3233 
3234   if (IsPIC && Subtarget.isSVR4ABI()) {
3235     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3236                                             GSDN->getOffset(),
3237                                             PPCII::MO_PIC_FLAG);
3238     return getTOCEntry(DAG, DL, GA);
3239   }
3240 
3241   SDValue GAHi =
3242     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3243   SDValue GALo =
3244     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3245 
3246   return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3247 }
3248 
3249 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3250   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3251   SDLoc dl(Op);
3252 
3253   if (Op.getValueType() == MVT::v2i64) {
3254     // When the operands themselves are v2i64 values, we need to do something
3255     // special because VSX has no underlying comparison operations for these.
3256     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3257       // Equality can be handled by casting to the legal type for Altivec
3258       // comparisons, everything else needs to be expanded.
3259       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3260         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3261                  DAG.getSetCC(dl, MVT::v4i32,
3262                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3263                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3264                    CC));
3265       }
3266 
3267       return SDValue();
3268     }
3269 
3270     // We handle most of these in the usual way.
3271     return Op;
3272   }
3273 
3274   // If we're comparing for equality to zero, expose the fact that this is
3275   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3276   // fold the new nodes.
3277   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3278     return V;
3279 
3280   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3281     // Leave comparisons against 0 and -1 alone for now, since they're usually
3282     // optimized.  FIXME: revisit this when we can custom lower all setcc
3283     // optimizations.
3284     if (C->isAllOnesValue() || C->isNullValue())
3285       return SDValue();
3286   }
3287 
3288   // If we have an integer seteq/setne, turn it into a compare against zero
3289   // by xor'ing the rhs with the lhs, which is faster than setting a
3290   // condition register, reading it back out, and masking the correct bit.  The
3291   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3292   // the result to other bit-twiddling opportunities.
3293   EVT LHSVT = Op.getOperand(0).getValueType();
3294   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3295     EVT VT = Op.getValueType();
3296     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3297                                 Op.getOperand(1));
3298     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3299   }
3300   return SDValue();
3301 }
3302 
3303 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3304   SDNode *Node = Op.getNode();
3305   EVT VT = Node->getValueType(0);
3306   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3307   SDValue InChain = Node->getOperand(0);
3308   SDValue VAListPtr = Node->getOperand(1);
3309   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3310   SDLoc dl(Node);
3311 
3312   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3313 
3314   // gpr_index
3315   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3316                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3317   InChain = GprIndex.getValue(1);
3318 
3319   if (VT == MVT::i64) {
3320     // Check if GprIndex is even
3321     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3322                                  DAG.getConstant(1, dl, MVT::i32));
3323     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3324                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3325     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3326                                           DAG.getConstant(1, dl, MVT::i32));
3327     // Align GprIndex to be even if it isn't
3328     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3329                            GprIndex);
3330   }
3331 
3332   // fpr index is 1 byte after gpr
3333   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3334                                DAG.getConstant(1, dl, MVT::i32));
3335 
3336   // fpr
3337   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3338                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3339   InChain = FprIndex.getValue(1);
3340 
3341   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3342                                        DAG.getConstant(8, dl, MVT::i32));
3343 
3344   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3345                                         DAG.getConstant(4, dl, MVT::i32));
3346 
3347   // areas
3348   SDValue OverflowArea =
3349       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3350   InChain = OverflowArea.getValue(1);
3351 
3352   SDValue RegSaveArea =
3353       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3354   InChain = RegSaveArea.getValue(1);
3355 
3356   // select overflow_area if index > 8
3357   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3358                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3359 
3360   // adjustment constant gpr_index * 4/8
3361   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3362                                     VT.isInteger() ? GprIndex : FprIndex,
3363                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3364                                                     MVT::i32));
3365 
3366   // OurReg = RegSaveArea + RegConstant
3367   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3368                                RegConstant);
3369 
3370   // Floating types are 32 bytes into RegSaveArea
3371   if (VT.isFloatingPoint())
3372     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3373                          DAG.getConstant(32, dl, MVT::i32));
3374 
3375   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3376   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3377                                    VT.isInteger() ? GprIndex : FprIndex,
3378                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3379                                                    MVT::i32));
3380 
3381   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3382                               VT.isInteger() ? VAListPtr : FprPtr,
3383                               MachinePointerInfo(SV), MVT::i8);
3384 
3385   // determine if we should load from reg_save_area or overflow_area
3386   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3387 
3388   // increase overflow_area by 4/8 if gpr/fpr > 8
3389   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3390                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3391                                           dl, MVT::i32));
3392 
3393   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3394                              OverflowAreaPlusN);
3395 
3396   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3397                               MachinePointerInfo(), MVT::i32);
3398 
3399   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3400 }
3401 
3402 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3403   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3404 
3405   // We have to copy the entire va_list struct:
3406   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3407   return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3408                        DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3409                        false, true, false, MachinePointerInfo(),
3410                        MachinePointerInfo());
3411 }
3412 
3413 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3414                                                   SelectionDAG &DAG) const {
3415   if (Subtarget.isAIXABI())
3416     report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3417 
3418   return Op.getOperand(0);
3419 }
3420 
3421 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3422                                                 SelectionDAG &DAG) const {
3423   if (Subtarget.isAIXABI())
3424     report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3425 
3426   SDValue Chain = Op.getOperand(0);
3427   SDValue Trmp = Op.getOperand(1); // trampoline
3428   SDValue FPtr = Op.getOperand(2); // nested function
3429   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3430   SDLoc dl(Op);
3431 
3432   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3433   bool isPPC64 = (PtrVT == MVT::i64);
3434   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3435 
3436   TargetLowering::ArgListTy Args;
3437   TargetLowering::ArgListEntry Entry;
3438 
3439   Entry.Ty = IntPtrTy;
3440   Entry.Node = Trmp; Args.push_back(Entry);
3441 
3442   // TrampSize == (isPPC64 ? 48 : 40);
3443   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3444                                isPPC64 ? MVT::i64 : MVT::i32);
3445   Args.push_back(Entry);
3446 
3447   Entry.Node = FPtr; Args.push_back(Entry);
3448   Entry.Node = Nest; Args.push_back(Entry);
3449 
3450   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3451   TargetLowering::CallLoweringInfo CLI(DAG);
3452   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3453       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3454       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3455 
3456   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3457   return CallResult.second;
3458 }
3459 
3460 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3461   MachineFunction &MF = DAG.getMachineFunction();
3462   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3463   EVT PtrVT = getPointerTy(MF.getDataLayout());
3464 
3465   SDLoc dl(Op);
3466 
3467   if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3468     // vastart just stores the address of the VarArgsFrameIndex slot into the
3469     // memory location argument.
3470     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3471     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3472     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3473                         MachinePointerInfo(SV));
3474   }
3475 
3476   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3477   // We suppose the given va_list is already allocated.
3478   //
3479   // typedef struct {
3480   //  char gpr;     /* index into the array of 8 GPRs
3481   //                 * stored in the register save area
3482   //                 * gpr=0 corresponds to r3,
3483   //                 * gpr=1 to r4, etc.
3484   //                 */
3485   //  char fpr;     /* index into the array of 8 FPRs
3486   //                 * stored in the register save area
3487   //                 * fpr=0 corresponds to f1,
3488   //                 * fpr=1 to f2, etc.
3489   //                 */
3490   //  char *overflow_arg_area;
3491   //                /* location on stack that holds
3492   //                 * the next overflow argument
3493   //                 */
3494   //  char *reg_save_area;
3495   //               /* where r3:r10 and f1:f8 (if saved)
3496   //                * are stored
3497   //                */
3498   // } va_list[1];
3499 
3500   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3501   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3502   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3503                                             PtrVT);
3504   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3505                                  PtrVT);
3506 
3507   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3508   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3509 
3510   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3511   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3512 
3513   uint64_t FPROffset = 1;
3514   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3515 
3516   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3517 
3518   // Store first byte : number of int regs
3519   SDValue firstStore =
3520       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3521                         MachinePointerInfo(SV), MVT::i8);
3522   uint64_t nextOffset = FPROffset;
3523   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3524                                   ConstFPROffset);
3525 
3526   // Store second byte : number of float regs
3527   SDValue secondStore =
3528       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3529                         MachinePointerInfo(SV, nextOffset), MVT::i8);
3530   nextOffset += StackOffset;
3531   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3532 
3533   // Store second word : arguments given on stack
3534   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3535                                     MachinePointerInfo(SV, nextOffset));
3536   nextOffset += FrameOffset;
3537   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3538 
3539   // Store third word : arguments given in registers
3540   return DAG.getStore(thirdStore, dl, FR, nextPtr,
3541                       MachinePointerInfo(SV, nextOffset));
3542 }
3543 
3544 /// FPR - The set of FP registers that should be allocated for arguments
3545 /// on Darwin and AIX.
3546 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3547                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3548                                 PPC::F11, PPC::F12, PPC::F13};
3549 
3550 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3551 /// the stack.
3552 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3553                                        unsigned PtrByteSize) {
3554   unsigned ArgSize = ArgVT.getStoreSize();
3555   if (Flags.isByVal())
3556     ArgSize = Flags.getByValSize();
3557 
3558   // Round up to multiples of the pointer size, except for array members,
3559   // which are always packed.
3560   if (!Flags.isInConsecutiveRegs())
3561     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3562 
3563   return ArgSize;
3564 }
3565 
3566 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3567 /// on the stack.
3568 static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3569                                          ISD::ArgFlagsTy Flags,
3570                                          unsigned PtrByteSize) {
3571   Align Alignment(PtrByteSize);
3572 
3573   // Altivec parameters are padded to a 16 byte boundary.
3574   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3575       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3576       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3577       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3578     Alignment = Align(16);
3579 
3580   // ByVal parameters are aligned as requested.
3581   if (Flags.isByVal()) {
3582     auto BVAlign = Flags.getNonZeroByValAlign();
3583     if (BVAlign > PtrByteSize) {
3584       if (BVAlign.value() % PtrByteSize != 0)
3585         llvm_unreachable(
3586             "ByVal alignment is not a multiple of the pointer size");
3587 
3588       Alignment = BVAlign;
3589     }
3590   }
3591 
3592   // Array members are always packed to their original alignment.
3593   if (Flags.isInConsecutiveRegs()) {
3594     // If the array member was split into multiple registers, the first
3595     // needs to be aligned to the size of the full type.  (Except for
3596     // ppcf128, which is only aligned as its f64 components.)
3597     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3598       Alignment = Align(OrigVT.getStoreSize());
3599     else
3600       Alignment = Align(ArgVT.getStoreSize());
3601   }
3602 
3603   return Alignment;
3604 }
3605 
3606 /// CalculateStackSlotUsed - Return whether this argument will use its
3607 /// stack slot (instead of being passed in registers).  ArgOffset,
3608 /// AvailableFPRs, and AvailableVRs must hold the current argument
3609 /// position, and will be updated to account for this argument.
3610 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3611                                    unsigned PtrByteSize, unsigned LinkageSize,
3612                                    unsigned ParamAreaSize, unsigned &ArgOffset,
3613                                    unsigned &AvailableFPRs,
3614                                    unsigned &AvailableVRs) {
3615   bool UseMemory = false;
3616 
3617   // Respect alignment of argument on the stack.
3618   Align Alignment =
3619       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3620   ArgOffset = alignTo(ArgOffset, Alignment);
3621   // If there's no space left in the argument save area, we must
3622   // use memory (this check also catches zero-sized arguments).
3623   if (ArgOffset >= LinkageSize + ParamAreaSize)
3624     UseMemory = true;
3625 
3626   // Allocate argument on the stack.
3627   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3628   if (Flags.isInConsecutiveRegsLast())
3629     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3630   // If we overran the argument save area, we must use memory
3631   // (this check catches arguments passed partially in memory)
3632   if (ArgOffset > LinkageSize + ParamAreaSize)
3633     UseMemory = true;
3634 
3635   // However, if the argument is actually passed in an FPR or a VR,
3636   // we don't use memory after all.
3637   if (!Flags.isByVal()) {
3638     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3639       if (AvailableFPRs > 0) {
3640         --AvailableFPRs;
3641         return false;
3642       }
3643     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3644         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3645         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3646         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3647       if (AvailableVRs > 0) {
3648         --AvailableVRs;
3649         return false;
3650       }
3651   }
3652 
3653   return UseMemory;
3654 }
3655 
3656 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3657 /// ensure minimum alignment required for target.
3658 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3659                                      unsigned NumBytes) {
3660   return alignTo(NumBytes, Lowering->getStackAlign());
3661 }
3662 
3663 SDValue PPCTargetLowering::LowerFormalArguments(
3664     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3665     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3666     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3667   if (Subtarget.isAIXABI())
3668     return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3669                                     InVals);
3670   if (Subtarget.is64BitELFABI())
3671     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3672                                        InVals);
3673   if (Subtarget.is32BitELFABI())
3674     return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3675                                        InVals);
3676 
3677   return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3678                                      InVals);
3679 }
3680 
3681 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3682     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3683     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3684     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3685 
3686   // 32-bit SVR4 ABI Stack Frame Layout:
3687   //              +-----------------------------------+
3688   //        +-->  |            Back chain             |
3689   //        |     +-----------------------------------+
3690   //        |     | Floating-point register save area |
3691   //        |     +-----------------------------------+
3692   //        |     |    General register save area     |
3693   //        |     +-----------------------------------+
3694   //        |     |          CR save word             |
3695   //        |     +-----------------------------------+
3696   //        |     |         VRSAVE save word          |
3697   //        |     +-----------------------------------+
3698   //        |     |         Alignment padding         |
3699   //        |     +-----------------------------------+
3700   //        |     |     Vector register save area     |
3701   //        |     +-----------------------------------+
3702   //        |     |       Local variable space        |
3703   //        |     +-----------------------------------+
3704   //        |     |        Parameter list area        |
3705   //        |     +-----------------------------------+
3706   //        |     |           LR save word            |
3707   //        |     +-----------------------------------+
3708   // SP-->  +---  |            Back chain             |
3709   //              +-----------------------------------+
3710   //
3711   // Specifications:
3712   //   System V Application Binary Interface PowerPC Processor Supplement
3713   //   AltiVec Technology Programming Interface Manual
3714 
3715   MachineFunction &MF = DAG.getMachineFunction();
3716   MachineFrameInfo &MFI = MF.getFrameInfo();
3717   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3718 
3719   EVT PtrVT = getPointerTy(MF.getDataLayout());
3720   // Potential tail calls could cause overwriting of argument stack slots.
3721   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3722                        (CallConv == CallingConv::Fast));
3723   const Align PtrAlign(4);
3724 
3725   // Assign locations to all of the incoming arguments.
3726   SmallVector<CCValAssign, 16> ArgLocs;
3727   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3728                  *DAG.getContext());
3729 
3730   // Reserve space for the linkage area on the stack.
3731   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3732   CCInfo.AllocateStack(LinkageSize, PtrAlign);
3733   if (useSoftFloat())
3734     CCInfo.PreAnalyzeFormalArguments(Ins);
3735 
3736   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3737   CCInfo.clearWasPPCF128();
3738 
3739   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3740     CCValAssign &VA = ArgLocs[i];
3741 
3742     // Arguments stored in registers.
3743     if (VA.isRegLoc()) {
3744       const TargetRegisterClass *RC;
3745       EVT ValVT = VA.getValVT();
3746 
3747       switch (ValVT.getSimpleVT().SimpleTy) {
3748         default:
3749           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3750         case MVT::i1:
3751         case MVT::i32:
3752           RC = &PPC::GPRCRegClass;
3753           break;
3754         case MVT::f32:
3755           if (Subtarget.hasP8Vector())
3756             RC = &PPC::VSSRCRegClass;
3757           else if (Subtarget.hasSPE())
3758             RC = &PPC::GPRCRegClass;
3759           else
3760             RC = &PPC::F4RCRegClass;
3761           break;
3762         case MVT::f64:
3763           if (Subtarget.hasVSX())
3764             RC = &PPC::VSFRCRegClass;
3765           else if (Subtarget.hasSPE())
3766             // SPE passes doubles in GPR pairs.
3767             RC = &PPC::GPRCRegClass;
3768           else
3769             RC = &PPC::F8RCRegClass;
3770           break;
3771         case MVT::v16i8:
3772         case MVT::v8i16:
3773         case MVT::v4i32:
3774           RC = &PPC::VRRCRegClass;
3775           break;
3776         case MVT::v4f32:
3777           RC = &PPC::VRRCRegClass;
3778           break;
3779         case MVT::v2f64:
3780         case MVT::v2i64:
3781           RC = &PPC::VRRCRegClass;
3782           break;
3783       }
3784 
3785       SDValue ArgValue;
3786       // Transform the arguments stored in physical registers into
3787       // virtual ones.
3788       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3789         assert(i + 1 < e && "No second half of double precision argument");
3790         unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3791         unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3792         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3793         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3794         if (!Subtarget.isLittleEndian())
3795           std::swap (ArgValueLo, ArgValueHi);
3796         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3797                                ArgValueHi);
3798       } else {
3799         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3800         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3801                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
3802         if (ValVT == MVT::i1)
3803           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3804       }
3805 
3806       InVals.push_back(ArgValue);
3807     } else {
3808       // Argument stored in memory.
3809       assert(VA.isMemLoc());
3810 
3811       // Get the extended size of the argument type in stack
3812       unsigned ArgSize = VA.getLocVT().getStoreSize();
3813       // Get the actual size of the argument type
3814       unsigned ObjSize = VA.getValVT().getStoreSize();
3815       unsigned ArgOffset = VA.getLocMemOffset();
3816       // Stack objects in PPC32 are right justified.
3817       ArgOffset += ArgSize - ObjSize;
3818       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3819 
3820       // Create load nodes to retrieve arguments from the stack.
3821       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3822       InVals.push_back(
3823           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3824     }
3825   }
3826 
3827   // Assign locations to all of the incoming aggregate by value arguments.
3828   // Aggregates passed by value are stored in the local variable space of the
3829   // caller's stack frame, right above the parameter list area.
3830   SmallVector<CCValAssign, 16> ByValArgLocs;
3831   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3832                       ByValArgLocs, *DAG.getContext());
3833 
3834   // Reserve stack space for the allocations in CCInfo.
3835   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3836 
3837   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3838 
3839   // Area that is at least reserved in the caller of this function.
3840   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3841   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3842 
3843   // Set the size that is at least reserved in caller of this function.  Tail
3844   // call optimized function's reserved stack space needs to be aligned so that
3845   // taking the difference between two stack areas will result in an aligned
3846   // stack.
3847   MinReservedArea =
3848       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3849   FuncInfo->setMinReservedArea(MinReservedArea);
3850 
3851   SmallVector<SDValue, 8> MemOps;
3852 
3853   // If the function takes variable number of arguments, make a frame index for
3854   // the start of the first vararg value... for expansion of llvm.va_start.
3855   if (isVarArg) {
3856     static const MCPhysReg GPArgRegs[] = {
3857       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3858       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3859     };
3860     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3861 
3862     static const MCPhysReg FPArgRegs[] = {
3863       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3864       PPC::F8
3865     };
3866     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3867 
3868     if (useSoftFloat() || hasSPE())
3869        NumFPArgRegs = 0;
3870 
3871     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3872     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3873 
3874     // Make room for NumGPArgRegs and NumFPArgRegs.
3875     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3876                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3877 
3878     FuncInfo->setVarArgsStackOffset(
3879       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3880                             CCInfo.getNextStackOffset(), true));
3881 
3882     FuncInfo->setVarArgsFrameIndex(
3883         MFI.CreateStackObject(Depth, Align(8), false));
3884     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3885 
3886     // The fixed integer arguments of a variadic function are stored to the
3887     // VarArgsFrameIndex on the stack so that they may be loaded by
3888     // dereferencing the result of va_next.
3889     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3890       // Get an existing live-in vreg, or add a new one.
3891       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3892       if (!VReg)
3893         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3894 
3895       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3896       SDValue Store =
3897           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3898       MemOps.push_back(Store);
3899       // Increment the address by four for the next argument to store
3900       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3901       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3902     }
3903 
3904     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3905     // is set.
3906     // The double arguments are stored to the VarArgsFrameIndex
3907     // on the stack.
3908     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3909       // Get an existing live-in vreg, or add a new one.
3910       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3911       if (!VReg)
3912         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3913 
3914       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3915       SDValue Store =
3916           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3917       MemOps.push_back(Store);
3918       // Increment the address by eight for the next argument to store
3919       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3920                                          PtrVT);
3921       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3922     }
3923   }
3924 
3925   if (!MemOps.empty())
3926     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3927 
3928   return Chain;
3929 }
3930 
3931 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3932 // value to MVT::i64 and then truncate to the correct register size.
3933 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3934                                              EVT ObjectVT, SelectionDAG &DAG,
3935                                              SDValue ArgVal,
3936                                              const SDLoc &dl) const {
3937   if (Flags.isSExt())
3938     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3939                          DAG.getValueType(ObjectVT));
3940   else if (Flags.isZExt())
3941     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3942                          DAG.getValueType(ObjectVT));
3943 
3944   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3945 }
3946 
3947 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3948     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3949     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3950     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3951   // TODO: add description of PPC stack frame format, or at least some docs.
3952   //
3953   bool isELFv2ABI = Subtarget.isELFv2ABI();
3954   bool isLittleEndian = Subtarget.isLittleEndian();
3955   MachineFunction &MF = DAG.getMachineFunction();
3956   MachineFrameInfo &MFI = MF.getFrameInfo();
3957   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3958 
3959   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3960          "fastcc not supported on varargs functions");
3961 
3962   EVT PtrVT = getPointerTy(MF.getDataLayout());
3963   // Potential tail calls could cause overwriting of argument stack slots.
3964   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3965                        (CallConv == CallingConv::Fast));
3966   unsigned PtrByteSize = 8;
3967   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3968 
3969   static const MCPhysReg GPR[] = {
3970     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3971     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3972   };
3973   static const MCPhysReg VR[] = {
3974     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3975     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3976   };
3977 
3978   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3979   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3980   const unsigned Num_VR_Regs  = array_lengthof(VR);
3981 
3982   // Do a first pass over the arguments to determine whether the ABI
3983   // guarantees that our caller has allocated the parameter save area
3984   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3985   // in the ELFv2 ABI, it is true if this is a vararg function or if
3986   // any parameter is located in a stack slot.
3987 
3988   bool HasParameterArea = !isELFv2ABI || isVarArg;
3989   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3990   unsigned NumBytes = LinkageSize;
3991   unsigned AvailableFPRs = Num_FPR_Regs;
3992   unsigned AvailableVRs = Num_VR_Regs;
3993   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3994     if (Ins[i].Flags.isNest())
3995       continue;
3996 
3997     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3998                                PtrByteSize, LinkageSize, ParamAreaSize,
3999                                NumBytes, AvailableFPRs, AvailableVRs))
4000       HasParameterArea = true;
4001   }
4002 
4003   // Add DAG nodes to load the arguments or copy them out of registers.  On
4004   // entry to a function on PPC, the arguments start after the linkage area,
4005   // although the first ones are often in registers.
4006 
4007   unsigned ArgOffset = LinkageSize;
4008   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4009   SmallVector<SDValue, 8> MemOps;
4010   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4011   unsigned CurArgIdx = 0;
4012   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4013     SDValue ArgVal;
4014     bool needsLoad = false;
4015     EVT ObjectVT = Ins[ArgNo].VT;
4016     EVT OrigVT = Ins[ArgNo].ArgVT;
4017     unsigned ObjSize = ObjectVT.getStoreSize();
4018     unsigned ArgSize = ObjSize;
4019     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4020     if (Ins[ArgNo].isOrigArg()) {
4021       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4022       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4023     }
4024     // We re-align the argument offset for each argument, except when using the
4025     // fast calling convention, when we need to make sure we do that only when
4026     // we'll actually use a stack slot.
4027     unsigned CurArgOffset;
4028     Align Alignment;
4029     auto ComputeArgOffset = [&]() {
4030       /* Respect alignment of argument on the stack.  */
4031       Alignment =
4032           CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4033       ArgOffset = alignTo(ArgOffset, Alignment);
4034       CurArgOffset = ArgOffset;
4035     };
4036 
4037     if (CallConv != CallingConv::Fast) {
4038       ComputeArgOffset();
4039 
4040       /* Compute GPR index associated with argument offset.  */
4041       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4042       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4043     }
4044 
4045     // FIXME the codegen can be much improved in some cases.
4046     // We do not have to keep everything in memory.
4047     if (Flags.isByVal()) {
4048       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4049 
4050       if (CallConv == CallingConv::Fast)
4051         ComputeArgOffset();
4052 
4053       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4054       ObjSize = Flags.getByValSize();
4055       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4056       // Empty aggregate parameters do not take up registers.  Examples:
4057       //   struct { } a;
4058       //   union  { } b;
4059       //   int c[0];
4060       // etc.  However, we have to provide a place-holder in InVals, so
4061       // pretend we have an 8-byte item at the current address for that
4062       // purpose.
4063       if (!ObjSize) {
4064         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4065         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4066         InVals.push_back(FIN);
4067         continue;
4068       }
4069 
4070       // Create a stack object covering all stack doublewords occupied
4071       // by the argument.  If the argument is (fully or partially) on
4072       // the stack, or if the argument is fully in registers but the
4073       // caller has allocated the parameter save anyway, we can refer
4074       // directly to the caller's stack frame.  Otherwise, create a
4075       // local copy in our own frame.
4076       int FI;
4077       if (HasParameterArea ||
4078           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4079         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4080       else
4081         FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4082       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4083 
4084       // Handle aggregates smaller than 8 bytes.
4085       if (ObjSize < PtrByteSize) {
4086         // The value of the object is its address, which differs from the
4087         // address of the enclosing doubleword on big-endian systems.
4088         SDValue Arg = FIN;
4089         if (!isLittleEndian) {
4090           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4091           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4092         }
4093         InVals.push_back(Arg);
4094 
4095         if (GPR_idx != Num_GPR_Regs) {
4096           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4097           FuncInfo->addLiveInAttr(VReg, Flags);
4098           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4099           SDValue Store;
4100 
4101           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4102             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4103                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
4104             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4105                                       MachinePointerInfo(&*FuncArg), ObjType);
4106           } else {
4107             // For sizes that don't fit a truncating store (3, 5, 6, 7),
4108             // store the whole register as-is to the parameter save area
4109             // slot.
4110             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4111                                  MachinePointerInfo(&*FuncArg));
4112           }
4113 
4114           MemOps.push_back(Store);
4115         }
4116         // Whether we copied from a register or not, advance the offset
4117         // into the parameter save area by a full doubleword.
4118         ArgOffset += PtrByteSize;
4119         continue;
4120       }
4121 
4122       // The value of the object is its address, which is the address of
4123       // its first stack doubleword.
4124       InVals.push_back(FIN);
4125 
4126       // Store whatever pieces of the object are in registers to memory.
4127       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4128         if (GPR_idx == Num_GPR_Regs)
4129           break;
4130 
4131         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4132         FuncInfo->addLiveInAttr(VReg, Flags);
4133         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4134         SDValue Addr = FIN;
4135         if (j) {
4136           SDValue Off = DAG.getConstant(j, dl, PtrVT);
4137           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4138         }
4139         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4140                                      MachinePointerInfo(&*FuncArg, j));
4141         MemOps.push_back(Store);
4142         ++GPR_idx;
4143       }
4144       ArgOffset += ArgSize;
4145       continue;
4146     }
4147 
4148     switch (ObjectVT.getSimpleVT().SimpleTy) {
4149     default: llvm_unreachable("Unhandled argument type!");
4150     case MVT::i1:
4151     case MVT::i32:
4152     case MVT::i64:
4153       if (Flags.isNest()) {
4154         // The 'nest' parameter, if any, is passed in R11.
4155         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4156         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4157 
4158         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4159           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4160 
4161         break;
4162       }
4163 
4164       // These can be scalar arguments or elements of an integer array type
4165       // passed directly.  Clang may use those instead of "byval" aggregate
4166       // types to avoid forcing arguments to memory unnecessarily.
4167       if (GPR_idx != Num_GPR_Regs) {
4168         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4169         FuncInfo->addLiveInAttr(VReg, Flags);
4170         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4171 
4172         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4173           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4174           // value to MVT::i64 and then truncate to the correct register size.
4175           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4176       } else {
4177         if (CallConv == CallingConv::Fast)
4178           ComputeArgOffset();
4179 
4180         needsLoad = true;
4181         ArgSize = PtrByteSize;
4182       }
4183       if (CallConv != CallingConv::Fast || needsLoad)
4184         ArgOffset += 8;
4185       break;
4186 
4187     case MVT::f32:
4188     case MVT::f64:
4189       // These can be scalar arguments or elements of a float array type
4190       // passed directly.  The latter are used to implement ELFv2 homogenous
4191       // float aggregates.
4192       if (FPR_idx != Num_FPR_Regs) {
4193         unsigned VReg;
4194 
4195         if (ObjectVT == MVT::f32)
4196           VReg = MF.addLiveIn(FPR[FPR_idx],
4197                               Subtarget.hasP8Vector()
4198                                   ? &PPC::VSSRCRegClass
4199                                   : &PPC::F4RCRegClass);
4200         else
4201           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4202                                                 ? &PPC::VSFRCRegClass
4203                                                 : &PPC::F8RCRegClass);
4204 
4205         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4206         ++FPR_idx;
4207       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4208         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4209         // once we support fp <-> gpr moves.
4210 
4211         // This can only ever happen in the presence of f32 array types,
4212         // since otherwise we never run out of FPRs before running out
4213         // of GPRs.
4214         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4215         FuncInfo->addLiveInAttr(VReg, Flags);
4216         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4217 
4218         if (ObjectVT == MVT::f32) {
4219           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4220             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4221                                  DAG.getConstant(32, dl, MVT::i32));
4222           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4223         }
4224 
4225         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4226       } else {
4227         if (CallConv == CallingConv::Fast)
4228           ComputeArgOffset();
4229 
4230         needsLoad = true;
4231       }
4232 
4233       // When passing an array of floats, the array occupies consecutive
4234       // space in the argument area; only round up to the next doubleword
4235       // at the end of the array.  Otherwise, each float takes 8 bytes.
4236       if (CallConv != CallingConv::Fast || needsLoad) {
4237         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4238         ArgOffset += ArgSize;
4239         if (Flags.isInConsecutiveRegsLast())
4240           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4241       }
4242       break;
4243     case MVT::v4f32:
4244     case MVT::v4i32:
4245     case MVT::v8i16:
4246     case MVT::v16i8:
4247     case MVT::v2f64:
4248     case MVT::v2i64:
4249     case MVT::v1i128:
4250     case MVT::f128:
4251       // These can be scalar arguments or elements of a vector array type
4252       // passed directly.  The latter are used to implement ELFv2 homogenous
4253       // vector aggregates.
4254       if (VR_idx != Num_VR_Regs) {
4255         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4256         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4257         ++VR_idx;
4258       } else {
4259         if (CallConv == CallingConv::Fast)
4260           ComputeArgOffset();
4261         needsLoad = true;
4262       }
4263       if (CallConv != CallingConv::Fast || needsLoad)
4264         ArgOffset += 16;
4265       break;
4266     }
4267 
4268     // We need to load the argument to a virtual register if we determined
4269     // above that we ran out of physical registers of the appropriate type.
4270     if (needsLoad) {
4271       if (ObjSize < ArgSize && !isLittleEndian)
4272         CurArgOffset += ArgSize - ObjSize;
4273       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4274       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4275       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4276     }
4277 
4278     InVals.push_back(ArgVal);
4279   }
4280 
4281   // Area that is at least reserved in the caller of this function.
4282   unsigned MinReservedArea;
4283   if (HasParameterArea)
4284     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4285   else
4286     MinReservedArea = LinkageSize;
4287 
4288   // Set the size that is at least reserved in caller of this function.  Tail
4289   // call optimized functions' reserved stack space needs to be aligned so that
4290   // taking the difference between two stack areas will result in an aligned
4291   // stack.
4292   MinReservedArea =
4293       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4294   FuncInfo->setMinReservedArea(MinReservedArea);
4295 
4296   // If the function takes variable number of arguments, make a frame index for
4297   // the start of the first vararg value... for expansion of llvm.va_start.
4298   // On ELFv2ABI spec, it writes:
4299   // C programs that are intended to be *portable* across different compilers
4300   // and architectures must use the header file <stdarg.h> to deal with variable
4301   // argument lists.
4302   if (isVarArg && MFI.hasVAStart()) {
4303     int Depth = ArgOffset;
4304 
4305     FuncInfo->setVarArgsFrameIndex(
4306       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4307     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4308 
4309     // If this function is vararg, store any remaining integer argument regs
4310     // to their spots on the stack so that they may be loaded by dereferencing
4311     // the result of va_next.
4312     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4313          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4314       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4315       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4316       SDValue Store =
4317           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4318       MemOps.push_back(Store);
4319       // Increment the address by four for the next argument to store
4320       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4321       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4322     }
4323   }
4324 
4325   if (!MemOps.empty())
4326     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4327 
4328   return Chain;
4329 }
4330 
4331 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4332     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4333     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4334     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4335   // TODO: add description of PPC stack frame format, or at least some docs.
4336   //
4337   MachineFunction &MF = DAG.getMachineFunction();
4338   MachineFrameInfo &MFI = MF.getFrameInfo();
4339   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4340 
4341   EVT PtrVT = getPointerTy(MF.getDataLayout());
4342   bool isPPC64 = PtrVT == MVT::i64;
4343   // Potential tail calls could cause overwriting of argument stack slots.
4344   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4345                        (CallConv == CallingConv::Fast));
4346   unsigned PtrByteSize = isPPC64 ? 8 : 4;
4347   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4348   unsigned ArgOffset = LinkageSize;
4349   // Area that is at least reserved in caller of this function.
4350   unsigned MinReservedArea = ArgOffset;
4351 
4352   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4353     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4354     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4355   };
4356   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4357     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4358     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4359   };
4360   static const MCPhysReg VR[] = {
4361     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4362     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4363   };
4364 
4365   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4366   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4367   const unsigned Num_VR_Regs  = array_lengthof( VR);
4368 
4369   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4370 
4371   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4372 
4373   // In 32-bit non-varargs functions, the stack space for vectors is after the
4374   // stack space for non-vectors.  We do not use this space unless we have
4375   // too many vectors to fit in registers, something that only occurs in
4376   // constructed examples:), but we have to walk the arglist to figure
4377   // that out...for the pathological case, compute VecArgOffset as the
4378   // start of the vector parameter area.  Computing VecArgOffset is the
4379   // entire point of the following loop.
4380   unsigned VecArgOffset = ArgOffset;
4381   if (!isVarArg && !isPPC64) {
4382     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4383          ++ArgNo) {
4384       EVT ObjectVT = Ins[ArgNo].VT;
4385       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4386 
4387       if (Flags.isByVal()) {
4388         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4389         unsigned ObjSize = Flags.getByValSize();
4390         unsigned ArgSize =
4391                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4392         VecArgOffset += ArgSize;
4393         continue;
4394       }
4395 
4396       switch(ObjectVT.getSimpleVT().SimpleTy) {
4397       default: llvm_unreachable("Unhandled argument type!");
4398       case MVT::i1:
4399       case MVT::i32:
4400       case MVT::f32:
4401         VecArgOffset += 4;
4402         break;
4403       case MVT::i64:  // PPC64
4404       case MVT::f64:
4405         // FIXME: We are guaranteed to be !isPPC64 at this point.
4406         // Does MVT::i64 apply?
4407         VecArgOffset += 8;
4408         break;
4409       case MVT::v4f32:
4410       case MVT::v4i32:
4411       case MVT::v8i16:
4412       case MVT::v16i8:
4413         // Nothing to do, we're only looking at Nonvector args here.
4414         break;
4415       }
4416     }
4417   }
4418   // We've found where the vector parameter area in memory is.  Skip the
4419   // first 12 parameters; these don't use that memory.
4420   VecArgOffset = ((VecArgOffset+15)/16)*16;
4421   VecArgOffset += 12*16;
4422 
4423   // Add DAG nodes to load the arguments or copy them out of registers.  On
4424   // entry to a function on PPC, the arguments start after the linkage area,
4425   // although the first ones are often in registers.
4426 
4427   SmallVector<SDValue, 8> MemOps;
4428   unsigned nAltivecParamsAtEnd = 0;
4429   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4430   unsigned CurArgIdx = 0;
4431   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4432     SDValue ArgVal;
4433     bool needsLoad = false;
4434     EVT ObjectVT = Ins[ArgNo].VT;
4435     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4436     unsigned ArgSize = ObjSize;
4437     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4438     if (Ins[ArgNo].isOrigArg()) {
4439       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4440       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4441     }
4442     unsigned CurArgOffset = ArgOffset;
4443 
4444     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4445     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4446         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4447       if (isVarArg || isPPC64) {
4448         MinReservedArea = ((MinReservedArea+15)/16)*16;
4449         MinReservedArea += CalculateStackSlotSize(ObjectVT,
4450                                                   Flags,
4451                                                   PtrByteSize);
4452       } else  nAltivecParamsAtEnd++;
4453     } else
4454       // Calculate min reserved area.
4455       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4456                                                 Flags,
4457                                                 PtrByteSize);
4458 
4459     // FIXME the codegen can be much improved in some cases.
4460     // We do not have to keep everything in memory.
4461     if (Flags.isByVal()) {
4462       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4463 
4464       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4465       ObjSize = Flags.getByValSize();
4466       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4467       // Objects of size 1 and 2 are right justified, everything else is
4468       // left justified.  This means the memory address is adjusted forwards.
4469       if (ObjSize==1 || ObjSize==2) {
4470         CurArgOffset = CurArgOffset + (4 - ObjSize);
4471       }
4472       // The value of the object is its address.
4473       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4474       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4475       InVals.push_back(FIN);
4476       if (ObjSize==1 || ObjSize==2) {
4477         if (GPR_idx != Num_GPR_Regs) {
4478           unsigned VReg;
4479           if (isPPC64)
4480             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4481           else
4482             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4483           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4484           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4485           SDValue Store =
4486               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4487                                 MachinePointerInfo(&*FuncArg), ObjType);
4488           MemOps.push_back(Store);
4489           ++GPR_idx;
4490         }
4491 
4492         ArgOffset += PtrByteSize;
4493 
4494         continue;
4495       }
4496       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4497         // Store whatever pieces of the object are in registers
4498         // to memory.  ArgOffset will be the address of the beginning
4499         // of the object.
4500         if (GPR_idx != Num_GPR_Regs) {
4501           unsigned VReg;
4502           if (isPPC64)
4503             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4504           else
4505             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4506           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4507           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4508           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4509           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4510                                        MachinePointerInfo(&*FuncArg, j));
4511           MemOps.push_back(Store);
4512           ++GPR_idx;
4513           ArgOffset += PtrByteSize;
4514         } else {
4515           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4516           break;
4517         }
4518       }
4519       continue;
4520     }
4521 
4522     switch (ObjectVT.getSimpleVT().SimpleTy) {
4523     default: llvm_unreachable("Unhandled argument type!");
4524     case MVT::i1:
4525     case MVT::i32:
4526       if (!isPPC64) {
4527         if (GPR_idx != Num_GPR_Regs) {
4528           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4529           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4530 
4531           if (ObjectVT == MVT::i1)
4532             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4533 
4534           ++GPR_idx;
4535         } else {
4536           needsLoad = true;
4537           ArgSize = PtrByteSize;
4538         }
4539         // All int arguments reserve stack space in the Darwin ABI.
4540         ArgOffset += PtrByteSize;
4541         break;
4542       }
4543       LLVM_FALLTHROUGH;
4544     case MVT::i64:  // PPC64
4545       if (GPR_idx != Num_GPR_Regs) {
4546         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4547         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4548 
4549         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4550           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4551           // value to MVT::i64 and then truncate to the correct register size.
4552           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4553 
4554         ++GPR_idx;
4555       } else {
4556         needsLoad = true;
4557         ArgSize = PtrByteSize;
4558       }
4559       // All int arguments reserve stack space in the Darwin ABI.
4560       ArgOffset += 8;
4561       break;
4562 
4563     case MVT::f32:
4564     case MVT::f64:
4565       // Every 4 bytes of argument space consumes one of the GPRs available for
4566       // argument passing.
4567       if (GPR_idx != Num_GPR_Regs) {
4568         ++GPR_idx;
4569         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4570           ++GPR_idx;
4571       }
4572       if (FPR_idx != Num_FPR_Regs) {
4573         unsigned VReg;
4574 
4575         if (ObjectVT == MVT::f32)
4576           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4577         else
4578           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4579 
4580         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4581         ++FPR_idx;
4582       } else {
4583         needsLoad = true;
4584       }
4585 
4586       // All FP arguments reserve stack space in the Darwin ABI.
4587       ArgOffset += isPPC64 ? 8 : ObjSize;
4588       break;
4589     case MVT::v4f32:
4590     case MVT::v4i32:
4591     case MVT::v8i16:
4592     case MVT::v16i8:
4593       // Note that vector arguments in registers don't reserve stack space,
4594       // except in varargs functions.
4595       if (VR_idx != Num_VR_Regs) {
4596         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4597         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4598         if (isVarArg) {
4599           while ((ArgOffset % 16) != 0) {
4600             ArgOffset += PtrByteSize;
4601             if (GPR_idx != Num_GPR_Regs)
4602               GPR_idx++;
4603           }
4604           ArgOffset += 16;
4605           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4606         }
4607         ++VR_idx;
4608       } else {
4609         if (!isVarArg && !isPPC64) {
4610           // Vectors go after all the nonvectors.
4611           CurArgOffset = VecArgOffset;
4612           VecArgOffset += 16;
4613         } else {
4614           // Vectors are aligned.
4615           ArgOffset = ((ArgOffset+15)/16)*16;
4616           CurArgOffset = ArgOffset;
4617           ArgOffset += 16;
4618         }
4619         needsLoad = true;
4620       }
4621       break;
4622     }
4623 
4624     // We need to load the argument to a virtual register if we determined above
4625     // that we ran out of physical registers of the appropriate type.
4626     if (needsLoad) {
4627       int FI = MFI.CreateFixedObject(ObjSize,
4628                                      CurArgOffset + (ArgSize - ObjSize),
4629                                      isImmutable);
4630       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4631       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4632     }
4633 
4634     InVals.push_back(ArgVal);
4635   }
4636 
4637   // Allow for Altivec parameters at the end, if needed.
4638   if (nAltivecParamsAtEnd) {
4639     MinReservedArea = ((MinReservedArea+15)/16)*16;
4640     MinReservedArea += 16*nAltivecParamsAtEnd;
4641   }
4642 
4643   // Area that is at least reserved in the caller of this function.
4644   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4645 
4646   // Set the size that is at least reserved in caller of this function.  Tail
4647   // call optimized functions' reserved stack space needs to be aligned so that
4648   // taking the difference between two stack areas will result in an aligned
4649   // stack.
4650   MinReservedArea =
4651       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4652   FuncInfo->setMinReservedArea(MinReservedArea);
4653 
4654   // If the function takes variable number of arguments, make a frame index for
4655   // the start of the first vararg value... for expansion of llvm.va_start.
4656   if (isVarArg) {
4657     int Depth = ArgOffset;
4658 
4659     FuncInfo->setVarArgsFrameIndex(
4660       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4661                             Depth, true));
4662     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4663 
4664     // If this function is vararg, store any remaining integer argument regs
4665     // to their spots on the stack so that they may be loaded by dereferencing
4666     // the result of va_next.
4667     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4668       unsigned VReg;
4669 
4670       if (isPPC64)
4671         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4672       else
4673         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4674 
4675       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4676       SDValue Store =
4677           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4678       MemOps.push_back(Store);
4679       // Increment the address by four for the next argument to store
4680       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4681       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4682     }
4683   }
4684 
4685   if (!MemOps.empty())
4686     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4687 
4688   return Chain;
4689 }
4690 
4691 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4692 /// adjusted to accommodate the arguments for the tailcall.
4693 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4694                                    unsigned ParamSize) {
4695 
4696   if (!isTailCall) return 0;
4697 
4698   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4699   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4700   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4701   // Remember only if the new adjustment is bigger.
4702   if (SPDiff < FI->getTailCallSPDelta())
4703     FI->setTailCallSPDelta(SPDiff);
4704 
4705   return SPDiff;
4706 }
4707 
4708 static bool isFunctionGlobalAddress(SDValue Callee);
4709 
4710 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4711                               const TargetMachine &TM) {
4712   // It does not make sense to call callsShareTOCBase() with a caller that
4713   // is PC Relative since PC Relative callers do not have a TOC.
4714 #ifndef NDEBUG
4715   const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4716   assert(!STICaller->isUsingPCRelativeCalls() &&
4717          "PC Relative callers do not have a TOC and cannot share a TOC Base");
4718 #endif
4719 
4720   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4721   // don't have enough information to determine if the caller and callee share
4722   // the same  TOC base, so we have to pessimistically assume they don't for
4723   // correctness.
4724   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4725   if (!G)
4726     return false;
4727 
4728   const GlobalValue *GV = G->getGlobal();
4729 
4730   // If the callee is preemptable, then the static linker will use a plt-stub
4731   // which saves the toc to the stack, and needs a nop after the call
4732   // instruction to convert to a toc-restore.
4733   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4734     return false;
4735 
4736   // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4737   // We may need a TOC restore in the situation where the caller requires a
4738   // valid TOC but the callee is PC Relative and does not.
4739   const Function *F = dyn_cast<Function>(GV);
4740   const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4741 
4742   // If we have an Alias we can try to get the function from there.
4743   if (Alias) {
4744     const GlobalObject *GlobalObj = Alias->getBaseObject();
4745     F = dyn_cast<Function>(GlobalObj);
4746   }
4747 
4748   // If we still have no valid function pointer we do not have enough
4749   // information to determine if the callee uses PC Relative calls so we must
4750   // assume that it does.
4751   if (!F)
4752     return false;
4753 
4754   // If the callee uses PC Relative we cannot guarantee that the callee won't
4755   // clobber the TOC of the caller and so we must assume that the two
4756   // functions do not share a TOC base.
4757   const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4758   if (STICallee->isUsingPCRelativeCalls())
4759     return false;
4760 
4761   // The medium and large code models are expected to provide a sufficiently
4762   // large TOC to provide all data addressing needs of a module with a
4763   // single TOC.
4764   if (CodeModel::Medium == TM.getCodeModel() ||
4765       CodeModel::Large == TM.getCodeModel())
4766     return true;
4767 
4768   // Otherwise we need to ensure callee and caller are in the same section,
4769   // since the linker may allocate multiple TOCs, and we don't know which
4770   // sections will belong to the same TOC base.
4771   if (!GV->isStrongDefinitionForLinker())
4772     return false;
4773 
4774   // Any explicitly-specified sections and section prefixes must also match.
4775   // Also, if we're using -ffunction-sections, then each function is always in
4776   // a different section (the same is true for COMDAT functions).
4777   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4778       GV->getSection() != Caller->getSection())
4779     return false;
4780   if (const auto *F = dyn_cast<Function>(GV)) {
4781     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4782       return false;
4783   }
4784 
4785   return true;
4786 }
4787 
4788 static bool
4789 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4790                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4791   assert(Subtarget.is64BitELFABI());
4792 
4793   const unsigned PtrByteSize = 8;
4794   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4795 
4796   static const MCPhysReg GPR[] = {
4797     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4798     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4799   };
4800   static const MCPhysReg VR[] = {
4801     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4802     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4803   };
4804 
4805   const unsigned NumGPRs = array_lengthof(GPR);
4806   const unsigned NumFPRs = 13;
4807   const unsigned NumVRs = array_lengthof(VR);
4808   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4809 
4810   unsigned NumBytes = LinkageSize;
4811   unsigned AvailableFPRs = NumFPRs;
4812   unsigned AvailableVRs = NumVRs;
4813 
4814   for (const ISD::OutputArg& Param : Outs) {
4815     if (Param.Flags.isNest()) continue;
4816 
4817     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4818                                LinkageSize, ParamAreaSize, NumBytes,
4819                                AvailableFPRs, AvailableVRs))
4820       return true;
4821   }
4822   return false;
4823 }
4824 
4825 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4826   if (CB.arg_size() != CallerFn->arg_size())
4827     return false;
4828 
4829   auto CalleeArgIter = CB.arg_begin();
4830   auto CalleeArgEnd = CB.arg_end();
4831   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4832 
4833   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4834     const Value* CalleeArg = *CalleeArgIter;
4835     const Value* CallerArg = &(*CallerArgIter);
4836     if (CalleeArg == CallerArg)
4837       continue;
4838 
4839     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4840     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4841     //      }
4842     // 1st argument of callee is undef and has the same type as caller.
4843     if (CalleeArg->getType() == CallerArg->getType() &&
4844         isa<UndefValue>(CalleeArg))
4845       continue;
4846 
4847     return false;
4848   }
4849 
4850   return true;
4851 }
4852 
4853 // Returns true if TCO is possible between the callers and callees
4854 // calling conventions.
4855 static bool
4856 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4857                                     CallingConv::ID CalleeCC) {
4858   // Tail calls are possible with fastcc and ccc.
4859   auto isTailCallableCC  = [] (CallingConv::ID CC){
4860       return  CC == CallingConv::C || CC == CallingConv::Fast;
4861   };
4862   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4863     return false;
4864 
4865   // We can safely tail call both fastcc and ccc callees from a c calling
4866   // convention caller. If the caller is fastcc, we may have less stack space
4867   // than a non-fastcc caller with the same signature so disable tail-calls in
4868   // that case.
4869   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4870 }
4871 
4872 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4873     SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4874     const SmallVectorImpl<ISD::OutputArg> &Outs,
4875     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4876   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4877 
4878   if (DisableSCO && !TailCallOpt) return false;
4879 
4880   // Variadic argument functions are not supported.
4881   if (isVarArg) return false;
4882 
4883   auto &Caller = DAG.getMachineFunction().getFunction();
4884   // Check that the calling conventions are compatible for tco.
4885   if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4886     return false;
4887 
4888   // Caller contains any byval parameter is not supported.
4889   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4890     return false;
4891 
4892   // Callee contains any byval parameter is not supported, too.
4893   // Note: This is a quick work around, because in some cases, e.g.
4894   // caller's stack size > callee's stack size, we are still able to apply
4895   // sibling call optimization. For example, gcc is able to do SCO for caller1
4896   // in the following example, but not for caller2.
4897   //   struct test {
4898   //     long int a;
4899   //     char ary[56];
4900   //   } gTest;
4901   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4902   //     b->a = v.a;
4903   //     return 0;
4904   //   }
4905   //   void caller1(struct test a, struct test c, struct test *b) {
4906   //     callee(gTest, b); }
4907   //   void caller2(struct test *b) { callee(gTest, b); }
4908   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4909     return false;
4910 
4911   // If callee and caller use different calling conventions, we cannot pass
4912   // parameters on stack since offsets for the parameter area may be different.
4913   if (Caller.getCallingConv() != CalleeCC &&
4914       needStackSlotPassParameters(Subtarget, Outs))
4915     return false;
4916 
4917   // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4918   // the caller and callee share the same TOC for TCO/SCO. If the caller and
4919   // callee potentially have different TOC bases then we cannot tail call since
4920   // we need to restore the TOC pointer after the call.
4921   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4922   // We cannot guarantee this for indirect calls or calls to external functions.
4923   // When PC-Relative addressing is used, the concept of the TOC is no longer
4924   // applicable so this check is not required.
4925   // Check first for indirect calls.
4926   if (!Subtarget.isUsingPCRelativeCalls() &&
4927       !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4928     return false;
4929 
4930   // Check if we share the TOC base.
4931   if (!Subtarget.isUsingPCRelativeCalls() &&
4932       !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4933     return false;
4934 
4935   // TCO allows altering callee ABI, so we don't have to check further.
4936   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4937     return true;
4938 
4939   if (DisableSCO) return false;
4940 
4941   // If callee use the same argument list that caller is using, then we can
4942   // apply SCO on this case. If it is not, then we need to check if callee needs
4943   // stack for passing arguments.
4944   // PC Relative tail calls may not have a CallBase.
4945   // If there is no CallBase we cannot verify if we have the same argument
4946   // list so assume that we don't have the same argument list.
4947   if (CB && !hasSameArgumentList(&Caller, *CB) &&
4948       needStackSlotPassParameters(Subtarget, Outs))
4949     return false;
4950   else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4951     return false;
4952 
4953   return true;
4954 }
4955 
4956 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4957 /// for tail call optimization. Targets which want to do tail call
4958 /// optimization should implement this function.
4959 bool
4960 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4961                                                      CallingConv::ID CalleeCC,
4962                                                      bool isVarArg,
4963                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4964                                                      SelectionDAG& DAG) const {
4965   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4966     return false;
4967 
4968   // Variable argument functions are not supported.
4969   if (isVarArg)
4970     return false;
4971 
4972   MachineFunction &MF = DAG.getMachineFunction();
4973   CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4974   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4975     // Functions containing by val parameters are not supported.
4976     for (unsigned i = 0; i != Ins.size(); i++) {
4977        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4978        if (Flags.isByVal()) return false;
4979     }
4980 
4981     // Non-PIC/GOT tail calls are supported.
4982     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4983       return true;
4984 
4985     // At the moment we can only do local tail calls (in same module, hidden
4986     // or protected) if we are generating PIC.
4987     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4988       return G->getGlobal()->hasHiddenVisibility()
4989           || G->getGlobal()->hasProtectedVisibility();
4990   }
4991 
4992   return false;
4993 }
4994 
4995 /// isCallCompatibleAddress - Return the immediate to use if the specified
4996 /// 32-bit value is representable in the immediate field of a BxA instruction.
4997 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4998   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4999   if (!C) return nullptr;
5000 
5001   int Addr = C->getZExtValue();
5002   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
5003       SignExtend32<26>(Addr) != Addr)
5004     return nullptr;  // Top 6 bits have to be sext of immediate.
5005 
5006   return DAG
5007       .getConstant(
5008           (int)C->getZExtValue() >> 2, SDLoc(Op),
5009           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
5010       .getNode();
5011 }
5012 
5013 namespace {
5014 
5015 struct TailCallArgumentInfo {
5016   SDValue Arg;
5017   SDValue FrameIdxOp;
5018   int FrameIdx = 0;
5019 
5020   TailCallArgumentInfo() = default;
5021 };
5022 
5023 } // end anonymous namespace
5024 
5025 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5026 static void StoreTailCallArgumentsToStackSlot(
5027     SelectionDAG &DAG, SDValue Chain,
5028     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5029     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5030   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5031     SDValue Arg = TailCallArgs[i].Arg;
5032     SDValue FIN = TailCallArgs[i].FrameIdxOp;
5033     int FI = TailCallArgs[i].FrameIdx;
5034     // Store relative to framepointer.
5035     MemOpChains.push_back(DAG.getStore(
5036         Chain, dl, Arg, FIN,
5037         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
5038   }
5039 }
5040 
5041 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5042 /// the appropriate stack slot for the tail call optimized function call.
5043 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5044                                              SDValue OldRetAddr, SDValue OldFP,
5045                                              int SPDiff, const SDLoc &dl) {
5046   if (SPDiff) {
5047     // Calculate the new stack slot for the return address.
5048     MachineFunction &MF = DAG.getMachineFunction();
5049     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5050     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5051     bool isPPC64 = Subtarget.isPPC64();
5052     int SlotSize = isPPC64 ? 8 : 4;
5053     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5054     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5055                                                          NewRetAddrLoc, true);
5056     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5057     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5058     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5059                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5060   }
5061   return Chain;
5062 }
5063 
5064 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5065 /// the position of the argument.
5066 static void
5067 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5068                          SDValue Arg, int SPDiff, unsigned ArgOffset,
5069                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5070   int Offset = ArgOffset + SPDiff;
5071   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5072   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5073   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5074   SDValue FIN = DAG.getFrameIndex(FI, VT);
5075   TailCallArgumentInfo Info;
5076   Info.Arg = Arg;
5077   Info.FrameIdxOp = FIN;
5078   Info.FrameIdx = FI;
5079   TailCallArguments.push_back(Info);
5080 }
5081 
5082 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5083 /// stack slot. Returns the chain as result and the loaded frame pointers in
5084 /// LROpOut/FPOpout. Used when tail calling.
5085 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5086     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5087     SDValue &FPOpOut, const SDLoc &dl) const {
5088   if (SPDiff) {
5089     // Load the LR and FP stack slot for later adjusting.
5090     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5091     LROpOut = getReturnAddrFrameIndex(DAG);
5092     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5093     Chain = SDValue(LROpOut.getNode(), 1);
5094   }
5095   return Chain;
5096 }
5097 
5098 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5099 /// by "Src" to address "Dst" of size "Size".  Alignment information is
5100 /// specified by the specific parameter attribute. The copy will be passed as
5101 /// a byval function parameter.
5102 /// Sometimes what we are copying is the end of a larger object, the part that
5103 /// does not fit in registers.
5104 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5105                                          SDValue Chain, ISD::ArgFlagsTy Flags,
5106                                          SelectionDAG &DAG, const SDLoc &dl) {
5107   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5108   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5109                        Flags.getNonZeroByValAlign(), false, false, false,
5110                        MachinePointerInfo(), MachinePointerInfo());
5111 }
5112 
5113 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5114 /// tail calls.
5115 static void LowerMemOpCallTo(
5116     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5117     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5118     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5119     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5120   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5121   if (!isTailCall) {
5122     if (isVector) {
5123       SDValue StackPtr;
5124       if (isPPC64)
5125         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5126       else
5127         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5128       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5129                            DAG.getConstant(ArgOffset, dl, PtrVT));
5130     }
5131     MemOpChains.push_back(
5132         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5133     // Calculate and remember argument location.
5134   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5135                                   TailCallArguments);
5136 }
5137 
5138 static void
5139 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5140                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5141                 SDValue FPOp,
5142                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5143   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5144   // might overwrite each other in case of tail call optimization.
5145   SmallVector<SDValue, 8> MemOpChains2;
5146   // Do not flag preceding copytoreg stuff together with the following stuff.
5147   InFlag = SDValue();
5148   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5149                                     MemOpChains2, dl);
5150   if (!MemOpChains2.empty())
5151     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5152 
5153   // Store the return address to the appropriate stack slot.
5154   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5155 
5156   // Emit callseq_end just before tailcall node.
5157   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5158                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5159   InFlag = Chain.getValue(1);
5160 }
5161 
5162 // Is this global address that of a function that can be called by name? (as
5163 // opposed to something that must hold a descriptor for an indirect call).
5164 static bool isFunctionGlobalAddress(SDValue Callee) {
5165   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5166     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5167         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5168       return false;
5169 
5170     return G->getGlobal()->getValueType()->isFunctionTy();
5171   }
5172 
5173   return false;
5174 }
5175 
5176 SDValue PPCTargetLowering::LowerCallResult(
5177     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5178     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5179     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5180   SmallVector<CCValAssign, 16> RVLocs;
5181   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5182                     *DAG.getContext());
5183 
5184   CCRetInfo.AnalyzeCallResult(
5185       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5186                ? RetCC_PPC_Cold
5187                : RetCC_PPC);
5188 
5189   // Copy all of the result registers out of their specified physreg.
5190   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5191     CCValAssign &VA = RVLocs[i];
5192     assert(VA.isRegLoc() && "Can only return in registers!");
5193 
5194     SDValue Val;
5195 
5196     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5197       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5198                                       InFlag);
5199       Chain = Lo.getValue(1);
5200       InFlag = Lo.getValue(2);
5201       VA = RVLocs[++i]; // skip ahead to next loc
5202       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5203                                       InFlag);
5204       Chain = Hi.getValue(1);
5205       InFlag = Hi.getValue(2);
5206       if (!Subtarget.isLittleEndian())
5207         std::swap (Lo, Hi);
5208       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5209     } else {
5210       Val = DAG.getCopyFromReg(Chain, dl,
5211                                VA.getLocReg(), VA.getLocVT(), InFlag);
5212       Chain = Val.getValue(1);
5213       InFlag = Val.getValue(2);
5214     }
5215 
5216     switch (VA.getLocInfo()) {
5217     default: llvm_unreachable("Unknown loc info!");
5218     case CCValAssign::Full: break;
5219     case CCValAssign::AExt:
5220       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5221       break;
5222     case CCValAssign::ZExt:
5223       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5224                         DAG.getValueType(VA.getValVT()));
5225       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5226       break;
5227     case CCValAssign::SExt:
5228       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5229                         DAG.getValueType(VA.getValVT()));
5230       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5231       break;
5232     }
5233 
5234     InVals.push_back(Val);
5235   }
5236 
5237   return Chain;
5238 }
5239 
5240 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5241                            const PPCSubtarget &Subtarget, bool isPatchPoint) {
5242   // PatchPoint calls are not indirect.
5243   if (isPatchPoint)
5244     return false;
5245 
5246   if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
5247     return false;
5248 
5249   // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5250   // becuase the immediate function pointer points to a descriptor instead of
5251   // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5252   // pointer immediate points to the global entry point, while the BLA would
5253   // need to jump to the local entry point (see rL211174).
5254   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5255       isBLACompatibleAddress(Callee, DAG))
5256     return false;
5257 
5258   return true;
5259 }
5260 
5261 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5262 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5263   return Subtarget.isAIXABI() ||
5264          (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5265 }
5266 
5267 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5268                               const Function &Caller,
5269                               const SDValue &Callee,
5270                               const PPCSubtarget &Subtarget,
5271                               const TargetMachine &TM) {
5272   if (CFlags.IsTailCall)
5273     return PPCISD::TC_RETURN;
5274 
5275   // This is a call through a function pointer.
5276   if (CFlags.IsIndirect) {
5277     // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5278     // indirect calls. The save of the caller's TOC pointer to the stack will be
5279     // inserted into the DAG as part of call lowering. The restore of the TOC
5280     // pointer is modeled by using a pseudo instruction for the call opcode that
5281     // represents the 2 instruction sequence of an indirect branch and link,
5282     // immediately followed by a load of the TOC pointer from the the stack save
5283     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5284     // as it is not saved or used.
5285     return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5286                                                : PPCISD::BCTRL;
5287   }
5288 
5289   if (Subtarget.isUsingPCRelativeCalls()) {
5290     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5291     return PPCISD::CALL_NOTOC;
5292   }
5293 
5294   // The ABIs that maintain a TOC pointer accross calls need to have a nop
5295   // immediately following the call instruction if the caller and callee may
5296   // have different TOC bases. At link time if the linker determines the calls
5297   // may not share a TOC base, the call is redirected to a trampoline inserted
5298   // by the linker. The trampoline will (among other things) save the callers
5299   // TOC pointer at an ABI designated offset in the linkage area and the linker
5300   // will rewrite the nop to be a load of the TOC pointer from the linkage area
5301   // into gpr2.
5302   if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5303     return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5304                                                   : PPCISD::CALL_NOP;
5305 
5306   return PPCISD::CALL;
5307 }
5308 
5309 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5310                                const SDLoc &dl, const PPCSubtarget &Subtarget) {
5311   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5312     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5313       return SDValue(Dest, 0);
5314 
5315   // Returns true if the callee is local, and false otherwise.
5316   auto isLocalCallee = [&]() {
5317     const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5318     const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5319     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5320 
5321     return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5322            !dyn_cast_or_null<GlobalIFunc>(GV);
5323   };
5324 
5325   // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5326   // a static relocation model causes some versions of GNU LD (2.17.50, at
5327   // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5328   // built with secure-PLT.
5329   bool UsePlt =
5330       Subtarget.is32BitELFABI() && !isLocalCallee() &&
5331       Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5332 
5333   const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5334     const TargetMachine &TM = Subtarget.getTargetMachine();
5335     const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5336     MCSymbolXCOFF *S =
5337         cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5338 
5339     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5340     return DAG.getMCSymbol(S, PtrVT);
5341   };
5342 
5343   if (isFunctionGlobalAddress(Callee)) {
5344     const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5345 
5346     if (Subtarget.isAIXABI()) {
5347       assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5348       return getAIXFuncEntryPointSymbolSDNode(GV);
5349     }
5350     return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5351                                       UsePlt ? PPCII::MO_PLT : 0);
5352   }
5353 
5354   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5355     const char *SymName = S->getSymbol();
5356     if (Subtarget.isAIXABI()) {
5357       // If there exists a user-declared function whose name is the same as the
5358       // ExternalSymbol's, then we pick up the user-declared version.
5359       const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5360       if (const Function *F =
5361               dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5362         return getAIXFuncEntryPointSymbolSDNode(F);
5363 
5364       // On AIX, direct function calls reference the symbol for the function's
5365       // entry point, which is named by prepending a "." before the function's
5366       // C-linkage name. A Qualname is returned here because an external
5367       // function entry point is a csect with XTY_ER property.
5368       const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5369         auto &Context = DAG.getMachineFunction().getMMI().getContext();
5370         MCSectionXCOFF *Sec = Context.getXCOFFSection(
5371             (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5372             SectionKind::getMetadata());
5373         return Sec->getQualNameSymbol();
5374       };
5375 
5376       SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5377     }
5378     return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5379                                        UsePlt ? PPCII::MO_PLT : 0);
5380   }
5381 
5382   // No transformation needed.
5383   assert(Callee.getNode() && "What no callee?");
5384   return Callee;
5385 }
5386 
5387 static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5388   assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5389          "Expected a CALLSEQ_STARTSDNode.");
5390 
5391   // The last operand is the chain, except when the node has glue. If the node
5392   // has glue, then the last operand is the glue, and the chain is the second
5393   // last operand.
5394   SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5395   if (LastValue.getValueType() != MVT::Glue)
5396     return LastValue;
5397 
5398   return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5399 }
5400 
5401 // Creates the node that moves a functions address into the count register
5402 // to prepare for an indirect call instruction.
5403 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5404                                 SDValue &Glue, SDValue &Chain,
5405                                 const SDLoc &dl) {
5406   SDValue MTCTROps[] = {Chain, Callee, Glue};
5407   EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5408   Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5409                       makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5410   // The glue is the second value produced.
5411   Glue = Chain.getValue(1);
5412 }
5413 
5414 static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5415                                           SDValue &Glue, SDValue &Chain,
5416                                           SDValue CallSeqStart,
5417                                           const CallBase *CB, const SDLoc &dl,
5418                                           bool hasNest,
5419                                           const PPCSubtarget &Subtarget) {
5420   // Function pointers in the 64-bit SVR4 ABI do not point to the function
5421   // entry point, but to the function descriptor (the function entry point
5422   // address is part of the function descriptor though).
5423   // The function descriptor is a three doubleword structure with the
5424   // following fields: function entry point, TOC base address and
5425   // environment pointer.
5426   // Thus for a call through a function pointer, the following actions need
5427   // to be performed:
5428   //   1. Save the TOC of the caller in the TOC save area of its stack
5429   //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5430   //   2. Load the address of the function entry point from the function
5431   //      descriptor.
5432   //   3. Load the TOC of the callee from the function descriptor into r2.
5433   //   4. Load the environment pointer from the function descriptor into
5434   //      r11.
5435   //   5. Branch to the function entry point address.
5436   //   6. On return of the callee, the TOC of the caller needs to be
5437   //      restored (this is done in FinishCall()).
5438   //
5439   // The loads are scheduled at the beginning of the call sequence, and the
5440   // register copies are flagged together to ensure that no other
5441   // operations can be scheduled in between. E.g. without flagging the
5442   // copies together, a TOC access in the caller could be scheduled between
5443   // the assignment of the callee TOC and the branch to the callee, which leads
5444   // to incorrect code.
5445 
5446   // Start by loading the function address from the descriptor.
5447   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5448   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5449                       ? (MachineMemOperand::MODereferenceable |
5450                          MachineMemOperand::MOInvariant)
5451                       : MachineMemOperand::MONone;
5452 
5453   MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5454 
5455   // Registers used in building the DAG.
5456   const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5457   const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5458 
5459   // Offsets of descriptor members.
5460   const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5461   const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5462 
5463   const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5464   const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5465 
5466   // One load for the functions entry point address.
5467   SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5468                                     Alignment, MMOFlags);
5469 
5470   // One for loading the TOC anchor for the module that contains the called
5471   // function.
5472   SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5473   SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5474   SDValue TOCPtr =
5475       DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5476                   MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5477 
5478   // One for loading the environment pointer.
5479   SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5480   SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5481   SDValue LoadEnvPtr =
5482       DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5483                   MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5484 
5485 
5486   // Then copy the newly loaded TOC anchor to the TOC pointer.
5487   SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5488   Chain = TOCVal.getValue(0);
5489   Glue = TOCVal.getValue(1);
5490 
5491   // If the function call has an explicit 'nest' parameter, it takes the
5492   // place of the environment pointer.
5493   assert((!hasNest || !Subtarget.isAIXABI()) &&
5494          "Nest parameter is not supported on AIX.");
5495   if (!hasNest) {
5496     SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5497     Chain = EnvVal.getValue(0);
5498     Glue = EnvVal.getValue(1);
5499   }
5500 
5501   // The rest of the indirect call sequence is the same as the non-descriptor
5502   // DAG.
5503   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5504 }
5505 
5506 static void
5507 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5508                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5509                   SelectionDAG &DAG,
5510                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5511                   SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5512                   const PPCSubtarget &Subtarget) {
5513   const bool IsPPC64 = Subtarget.isPPC64();
5514   // MVT for a general purpose register.
5515   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5516 
5517   // First operand is always the chain.
5518   Ops.push_back(Chain);
5519 
5520   // If it's a direct call pass the callee as the second operand.
5521   if (!CFlags.IsIndirect)
5522     Ops.push_back(Callee);
5523   else {
5524     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5525 
5526     // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5527     // on the stack (this would have been done in `LowerCall_64SVR4` or
5528     // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5529     // represents both the indirect branch and a load that restores the TOC
5530     // pointer from the linkage area. The operand for the TOC restore is an add
5531     // of the TOC save offset to the stack pointer. This must be the second
5532     // operand: after the chain input but before any other variadic arguments.
5533     // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5534     // saved or used.
5535     if (isTOCSaveRestoreRequired(Subtarget)) {
5536       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5537 
5538       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5539       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5540       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5541       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5542       Ops.push_back(AddTOC);
5543     }
5544 
5545     // Add the register used for the environment pointer.
5546     if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5547       Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5548                                     RegVT));
5549 
5550 
5551     // Add CTR register as callee so a bctr can be emitted later.
5552     if (CFlags.IsTailCall)
5553       Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5554   }
5555 
5556   // If this is a tail call add stack pointer delta.
5557   if (CFlags.IsTailCall)
5558     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5559 
5560   // Add argument registers to the end of the list so that they are known live
5561   // into the call.
5562   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5563     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5564                                   RegsToPass[i].second.getValueType()));
5565 
5566   // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5567   // no way to mark dependencies as implicit here.
5568   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5569   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5570        !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5571     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5572 
5573   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5574   if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5575     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5576 
5577   // Add a register mask operand representing the call-preserved registers.
5578   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5579   const uint32_t *Mask =
5580       TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5581   assert(Mask && "Missing call preserved mask for calling convention");
5582   Ops.push_back(DAG.getRegisterMask(Mask));
5583 
5584   // If the glue is valid, it is the last operand.
5585   if (Glue.getNode())
5586     Ops.push_back(Glue);
5587 }
5588 
5589 SDValue PPCTargetLowering::FinishCall(
5590     CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5591     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5592     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5593     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5594     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5595 
5596   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5597       Subtarget.isAIXABI())
5598     setUsesTOCBasePtr(DAG);
5599 
5600   unsigned CallOpc =
5601       getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5602                     Subtarget, DAG.getTarget());
5603 
5604   if (!CFlags.IsIndirect)
5605     Callee = transformCallee(Callee, DAG, dl, Subtarget);
5606   else if (Subtarget.usesFunctionDescriptors())
5607     prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5608                                   dl, CFlags.HasNest, Subtarget);
5609   else
5610     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5611 
5612   // Build the operand list for the call instruction.
5613   SmallVector<SDValue, 8> Ops;
5614   buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5615                     SPDiff, Subtarget);
5616 
5617   // Emit tail call.
5618   if (CFlags.IsTailCall) {
5619     // Indirect tail call when using PC Relative calls do not have the same
5620     // constraints.
5621     assert(((Callee.getOpcode() == ISD::Register &&
5622              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5623             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5624             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5625             isa<ConstantSDNode>(Callee) ||
5626             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5627            "Expecting a global address, external symbol, absolute value, "
5628            "register or an indirect tail call when PC Relative calls are "
5629            "used.");
5630     // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5631     assert(CallOpc == PPCISD::TC_RETURN &&
5632            "Unexpected call opcode for a tail call.");
5633     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5634     return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5635   }
5636 
5637   std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5638   Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5639   DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5640   Glue = Chain.getValue(1);
5641 
5642   // When performing tail call optimization the callee pops its arguments off
5643   // the stack. Account for this here so these bytes can be pushed back on in
5644   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5645   int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5646                          getTargetMachine().Options.GuaranteedTailCallOpt)
5647                             ? NumBytes
5648                             : 0;
5649 
5650   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5651                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5652                              Glue, dl);
5653   Glue = Chain.getValue(1);
5654 
5655   return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5656                          DAG, InVals);
5657 }
5658 
5659 SDValue
5660 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5661                              SmallVectorImpl<SDValue> &InVals) const {
5662   SelectionDAG &DAG                     = CLI.DAG;
5663   SDLoc &dl                             = CLI.DL;
5664   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5665   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5666   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5667   SDValue Chain                         = CLI.Chain;
5668   SDValue Callee                        = CLI.Callee;
5669   bool &isTailCall                      = CLI.IsTailCall;
5670   CallingConv::ID CallConv              = CLI.CallConv;
5671   bool isVarArg                         = CLI.IsVarArg;
5672   bool isPatchPoint                     = CLI.IsPatchPoint;
5673   const CallBase *CB                    = CLI.CB;
5674 
5675   if (isTailCall) {
5676     if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5677       isTailCall = false;
5678     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5679       isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5680           Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5681     else
5682       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5683                                                      Ins, DAG);
5684     if (isTailCall) {
5685       ++NumTailCalls;
5686       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5687         ++NumSiblingCalls;
5688 
5689       // PC Relative calls no longer guarantee that the callee is a Global
5690       // Address Node. The callee could be an indirect tail call in which
5691       // case the SDValue for the callee could be a load (to load the address
5692       // of a function pointer) or it may be a register copy (to move the
5693       // address of the callee from a function parameter into a virtual
5694       // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5695       assert((Subtarget.isUsingPCRelativeCalls() ||
5696               isa<GlobalAddressSDNode>(Callee)) &&
5697              "Callee should be an llvm::Function object.");
5698 
5699       LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5700                         << "\nTCO callee: ");
5701       LLVM_DEBUG(Callee.dump());
5702     }
5703   }
5704 
5705   if (!isTailCall && CB && CB->isMustTailCall())
5706     report_fatal_error("failed to perform tail call elimination on a call "
5707                        "site marked musttail");
5708 
5709   // When long calls (i.e. indirect calls) are always used, calls are always
5710   // made via function pointer. If we have a function name, first translate it
5711   // into a pointer.
5712   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5713       !isTailCall)
5714     Callee = LowerGlobalAddress(Callee, DAG);
5715 
5716   CallFlags CFlags(
5717       CallConv, isTailCall, isVarArg, isPatchPoint,
5718       isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5719       // hasNest
5720       Subtarget.is64BitELFABI() &&
5721           any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5722       CLI.NoMerge);
5723 
5724   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5725     return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5726                             InVals, CB);
5727 
5728   if (Subtarget.isSVR4ABI())
5729     return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5730                             InVals, CB);
5731 
5732   if (Subtarget.isAIXABI())
5733     return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5734                          InVals, CB);
5735 
5736   return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5737                           InVals, CB);
5738 }
5739 
5740 SDValue PPCTargetLowering::LowerCall_32SVR4(
5741     SDValue Chain, SDValue Callee, CallFlags CFlags,
5742     const SmallVectorImpl<ISD::OutputArg> &Outs,
5743     const SmallVectorImpl<SDValue> &OutVals,
5744     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5745     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5746     const CallBase *CB) const {
5747   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5748   // of the 32-bit SVR4 ABI stack frame layout.
5749 
5750   const CallingConv::ID CallConv = CFlags.CallConv;
5751   const bool IsVarArg = CFlags.IsVarArg;
5752   const bool IsTailCall = CFlags.IsTailCall;
5753 
5754   assert((CallConv == CallingConv::C ||
5755           CallConv == CallingConv::Cold ||
5756           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5757 
5758   const Align PtrAlign(4);
5759 
5760   MachineFunction &MF = DAG.getMachineFunction();
5761 
5762   // Mark this function as potentially containing a function that contains a
5763   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5764   // and restoring the callers stack pointer in this functions epilog. This is
5765   // done because by tail calling the called function might overwrite the value
5766   // in this function's (MF) stack pointer stack slot 0(SP).
5767   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5768       CallConv == CallingConv::Fast)
5769     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5770 
5771   // Count how many bytes are to be pushed on the stack, including the linkage
5772   // area, parameter list area and the part of the local variable space which
5773   // contains copies of aggregates which are passed by value.
5774 
5775   // Assign locations to all of the outgoing arguments.
5776   SmallVector<CCValAssign, 16> ArgLocs;
5777   PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5778 
5779   // Reserve space for the linkage area on the stack.
5780   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5781                        PtrAlign);
5782   if (useSoftFloat())
5783     CCInfo.PreAnalyzeCallOperands(Outs);
5784 
5785   if (IsVarArg) {
5786     // Handle fixed and variable vector arguments differently.
5787     // Fixed vector arguments go into registers as long as registers are
5788     // available. Variable vector arguments always go into memory.
5789     unsigned NumArgs = Outs.size();
5790 
5791     for (unsigned i = 0; i != NumArgs; ++i) {
5792       MVT ArgVT = Outs[i].VT;
5793       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5794       bool Result;
5795 
5796       if (Outs[i].IsFixed) {
5797         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5798                                CCInfo);
5799       } else {
5800         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5801                                       ArgFlags, CCInfo);
5802       }
5803 
5804       if (Result) {
5805 #ifndef NDEBUG
5806         errs() << "Call operand #" << i << " has unhandled type "
5807              << EVT(ArgVT).getEVTString() << "\n";
5808 #endif
5809         llvm_unreachable(nullptr);
5810       }
5811     }
5812   } else {
5813     // All arguments are treated the same.
5814     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5815   }
5816   CCInfo.clearWasPPCF128();
5817 
5818   // Assign locations to all of the outgoing aggregate by value arguments.
5819   SmallVector<CCValAssign, 16> ByValArgLocs;
5820   CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5821 
5822   // Reserve stack space for the allocations in CCInfo.
5823   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5824 
5825   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5826 
5827   // Size of the linkage area, parameter list area and the part of the local
5828   // space variable where copies of aggregates which are passed by value are
5829   // stored.
5830   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5831 
5832   // Calculate by how many bytes the stack has to be adjusted in case of tail
5833   // call optimization.
5834   int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5835 
5836   // Adjust the stack pointer for the new arguments...
5837   // These operations are automatically eliminated by the prolog/epilog pass
5838   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5839   SDValue CallSeqStart = Chain;
5840 
5841   // Load the return address and frame pointer so it can be moved somewhere else
5842   // later.
5843   SDValue LROp, FPOp;
5844   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5845 
5846   // Set up a copy of the stack pointer for use loading and storing any
5847   // arguments that may not fit in the registers available for argument
5848   // passing.
5849   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5850 
5851   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5852   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5853   SmallVector<SDValue, 8> MemOpChains;
5854 
5855   bool seenFloatArg = false;
5856   // Walk the register/memloc assignments, inserting copies/loads.
5857   // i - Tracks the index into the list of registers allocated for the call
5858   // RealArgIdx - Tracks the index into the list of actual function arguments
5859   // j - Tracks the index into the list of byval arguments
5860   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5861        i != e;
5862        ++i, ++RealArgIdx) {
5863     CCValAssign &VA = ArgLocs[i];
5864     SDValue Arg = OutVals[RealArgIdx];
5865     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5866 
5867     if (Flags.isByVal()) {
5868       // Argument is an aggregate which is passed by value, thus we need to
5869       // create a copy of it in the local variable space of the current stack
5870       // frame (which is the stack frame of the caller) and pass the address of
5871       // this copy to the callee.
5872       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5873       CCValAssign &ByValVA = ByValArgLocs[j++];
5874       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5875 
5876       // Memory reserved in the local variable space of the callers stack frame.
5877       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5878 
5879       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5880       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5881                            StackPtr, PtrOff);
5882 
5883       // Create a copy of the argument in the local area of the current
5884       // stack frame.
5885       SDValue MemcpyCall =
5886         CreateCopyOfByValArgument(Arg, PtrOff,
5887                                   CallSeqStart.getNode()->getOperand(0),
5888                                   Flags, DAG, dl);
5889 
5890       // This must go outside the CALLSEQ_START..END.
5891       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5892                                                      SDLoc(MemcpyCall));
5893       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5894                              NewCallSeqStart.getNode());
5895       Chain = CallSeqStart = NewCallSeqStart;
5896 
5897       // Pass the address of the aggregate copy on the stack either in a
5898       // physical register or in the parameter list area of the current stack
5899       // frame to the callee.
5900       Arg = PtrOff;
5901     }
5902 
5903     // When useCRBits() is true, there can be i1 arguments.
5904     // It is because getRegisterType(MVT::i1) => MVT::i1,
5905     // and for other integer types getRegisterType() => MVT::i32.
5906     // Extend i1 and ensure callee will get i32.
5907     if (Arg.getValueType() == MVT::i1)
5908       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5909                         dl, MVT::i32, Arg);
5910 
5911     if (VA.isRegLoc()) {
5912       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5913       // Put argument in a physical register.
5914       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5915         bool IsLE = Subtarget.isLittleEndian();
5916         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5917                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5918         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5919         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5920                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5921         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5922                              SVal.getValue(0)));
5923       } else
5924         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5925     } else {
5926       // Put argument in the parameter list area of the current stack frame.
5927       assert(VA.isMemLoc());
5928       unsigned LocMemOffset = VA.getLocMemOffset();
5929 
5930       if (!IsTailCall) {
5931         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5932         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5933                              StackPtr, PtrOff);
5934 
5935         MemOpChains.push_back(
5936             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5937       } else {
5938         // Calculate and remember argument location.
5939         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5940                                  TailCallArguments);
5941       }
5942     }
5943   }
5944 
5945   if (!MemOpChains.empty())
5946     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5947 
5948   // Build a sequence of copy-to-reg nodes chained together with token chain
5949   // and flag operands which copy the outgoing args into the appropriate regs.
5950   SDValue InFlag;
5951   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5952     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5953                              RegsToPass[i].second, InFlag);
5954     InFlag = Chain.getValue(1);
5955   }
5956 
5957   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5958   // registers.
5959   if (IsVarArg) {
5960     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5961     SDValue Ops[] = { Chain, InFlag };
5962 
5963     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5964                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5965 
5966     InFlag = Chain.getValue(1);
5967   }
5968 
5969   if (IsTailCall)
5970     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5971                     TailCallArguments);
5972 
5973   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5974                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
5975 }
5976 
5977 // Copy an argument into memory, being careful to do this outside the
5978 // call sequence for the call to which the argument belongs.
5979 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5980     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5981     SelectionDAG &DAG, const SDLoc &dl) const {
5982   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5983                         CallSeqStart.getNode()->getOperand(0),
5984                         Flags, DAG, dl);
5985   // The MEMCPY must go outside the CALLSEQ_START..END.
5986   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5987   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5988                                                  SDLoc(MemcpyCall));
5989   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5990                          NewCallSeqStart.getNode());
5991   return NewCallSeqStart;
5992 }
5993 
5994 SDValue PPCTargetLowering::LowerCall_64SVR4(
5995     SDValue Chain, SDValue Callee, CallFlags CFlags,
5996     const SmallVectorImpl<ISD::OutputArg> &Outs,
5997     const SmallVectorImpl<SDValue> &OutVals,
5998     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5999     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6000     const CallBase *CB) const {
6001   bool isELFv2ABI = Subtarget.isELFv2ABI();
6002   bool isLittleEndian = Subtarget.isLittleEndian();
6003   unsigned NumOps = Outs.size();
6004   bool IsSibCall = false;
6005   bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6006 
6007   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6008   unsigned PtrByteSize = 8;
6009 
6010   MachineFunction &MF = DAG.getMachineFunction();
6011 
6012   if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6013     IsSibCall = true;
6014 
6015   // Mark this function as potentially containing a function that contains a
6016   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6017   // and restoring the callers stack pointer in this functions epilog. This is
6018   // done because by tail calling the called function might overwrite the value
6019   // in this function's (MF) stack pointer stack slot 0(SP).
6020   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6021     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6022 
6023   assert(!(IsFastCall && CFlags.IsVarArg) &&
6024          "fastcc not supported on varargs functions");
6025 
6026   // Count how many bytes are to be pushed on the stack, including the linkage
6027   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
6028   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6029   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6030   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6031   unsigned NumBytes = LinkageSize;
6032   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6033 
6034   static const MCPhysReg GPR[] = {
6035     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6036     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6037   };
6038   static const MCPhysReg VR[] = {
6039     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6040     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6041   };
6042 
6043   const unsigned NumGPRs = array_lengthof(GPR);
6044   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6045   const unsigned NumVRs  = array_lengthof(VR);
6046 
6047   // On ELFv2, we can avoid allocating the parameter area if all the arguments
6048   // can be passed to the callee in registers.
6049   // For the fast calling convention, there is another check below.
6050   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6051   bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6052   if (!HasParameterArea) {
6053     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6054     unsigned AvailableFPRs = NumFPRs;
6055     unsigned AvailableVRs = NumVRs;
6056     unsigned NumBytesTmp = NumBytes;
6057     for (unsigned i = 0; i != NumOps; ++i) {
6058       if (Outs[i].Flags.isNest()) continue;
6059       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6060                                  PtrByteSize, LinkageSize, ParamAreaSize,
6061                                  NumBytesTmp, AvailableFPRs, AvailableVRs))
6062         HasParameterArea = true;
6063     }
6064   }
6065 
6066   // When using the fast calling convention, we don't provide backing for
6067   // arguments that will be in registers.
6068   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6069 
6070   // Avoid allocating parameter area for fastcc functions if all the arguments
6071   // can be passed in the registers.
6072   if (IsFastCall)
6073     HasParameterArea = false;
6074 
6075   // Add up all the space actually used.
6076   for (unsigned i = 0; i != NumOps; ++i) {
6077     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6078     EVT ArgVT = Outs[i].VT;
6079     EVT OrigVT = Outs[i].ArgVT;
6080 
6081     if (Flags.isNest())
6082       continue;
6083 
6084     if (IsFastCall) {
6085       if (Flags.isByVal()) {
6086         NumGPRsUsed += (Flags.getByValSize()+7)/8;
6087         if (NumGPRsUsed > NumGPRs)
6088           HasParameterArea = true;
6089       } else {
6090         switch (ArgVT.getSimpleVT().SimpleTy) {
6091         default: llvm_unreachable("Unexpected ValueType for argument!");
6092         case MVT::i1:
6093         case MVT::i32:
6094         case MVT::i64:
6095           if (++NumGPRsUsed <= NumGPRs)
6096             continue;
6097           break;
6098         case MVT::v4i32:
6099         case MVT::v8i16:
6100         case MVT::v16i8:
6101         case MVT::v2f64:
6102         case MVT::v2i64:
6103         case MVT::v1i128:
6104         case MVT::f128:
6105           if (++NumVRsUsed <= NumVRs)
6106             continue;
6107           break;
6108         case MVT::v4f32:
6109           if (++NumVRsUsed <= NumVRs)
6110             continue;
6111           break;
6112         case MVT::f32:
6113         case MVT::f64:
6114           if (++NumFPRsUsed <= NumFPRs)
6115             continue;
6116           break;
6117         }
6118         HasParameterArea = true;
6119       }
6120     }
6121 
6122     /* Respect alignment of argument on the stack.  */
6123     auto Alignement =
6124         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6125     NumBytes = alignTo(NumBytes, Alignement);
6126 
6127     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6128     if (Flags.isInConsecutiveRegsLast())
6129       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6130   }
6131 
6132   unsigned NumBytesActuallyUsed = NumBytes;
6133 
6134   // In the old ELFv1 ABI,
6135   // the prolog code of the callee may store up to 8 GPR argument registers to
6136   // the stack, allowing va_start to index over them in memory if its varargs.
6137   // Because we cannot tell if this is needed on the caller side, we have to
6138   // conservatively assume that it is needed.  As such, make sure we have at
6139   // least enough stack space for the caller to store the 8 GPRs.
6140   // In the ELFv2 ABI, we allocate the parameter area iff a callee
6141   // really requires memory operands, e.g. a vararg function.
6142   if (HasParameterArea)
6143     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6144   else
6145     NumBytes = LinkageSize;
6146 
6147   // Tail call needs the stack to be aligned.
6148   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6149     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6150 
6151   int SPDiff = 0;
6152 
6153   // Calculate by how many bytes the stack has to be adjusted in case of tail
6154   // call optimization.
6155   if (!IsSibCall)
6156     SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6157 
6158   // To protect arguments on the stack from being clobbered in a tail call,
6159   // force all the loads to happen before doing any other lowering.
6160   if (CFlags.IsTailCall)
6161     Chain = DAG.getStackArgumentTokenFactor(Chain);
6162 
6163   // Adjust the stack pointer for the new arguments...
6164   // These operations are automatically eliminated by the prolog/epilog pass
6165   if (!IsSibCall)
6166     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6167   SDValue CallSeqStart = Chain;
6168 
6169   // Load the return address and frame pointer so it can be move somewhere else
6170   // later.
6171   SDValue LROp, FPOp;
6172   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6173 
6174   // Set up a copy of the stack pointer for use loading and storing any
6175   // arguments that may not fit in the registers available for argument
6176   // passing.
6177   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6178 
6179   // Figure out which arguments are going to go in registers, and which in
6180   // memory.  Also, if this is a vararg function, floating point operations
6181   // must be stored to our stack, and loaded into integer regs as well, if
6182   // any integer regs are available for argument passing.
6183   unsigned ArgOffset = LinkageSize;
6184 
6185   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6186   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6187 
6188   SmallVector<SDValue, 8> MemOpChains;
6189   for (unsigned i = 0; i != NumOps; ++i) {
6190     SDValue Arg = OutVals[i];
6191     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6192     EVT ArgVT = Outs[i].VT;
6193     EVT OrigVT = Outs[i].ArgVT;
6194 
6195     // PtrOff will be used to store the current argument to the stack if a
6196     // register cannot be found for it.
6197     SDValue PtrOff;
6198 
6199     // We re-align the argument offset for each argument, except when using the
6200     // fast calling convention, when we need to make sure we do that only when
6201     // we'll actually use a stack slot.
6202     auto ComputePtrOff = [&]() {
6203       /* Respect alignment of argument on the stack.  */
6204       auto Alignment =
6205           CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6206       ArgOffset = alignTo(ArgOffset, Alignment);
6207 
6208       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6209 
6210       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6211     };
6212 
6213     if (!IsFastCall) {
6214       ComputePtrOff();
6215 
6216       /* Compute GPR index associated with argument offset.  */
6217       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6218       GPR_idx = std::min(GPR_idx, NumGPRs);
6219     }
6220 
6221     // Promote integers to 64-bit values.
6222     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6223       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6224       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6225       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6226     }
6227 
6228     // FIXME memcpy is used way more than necessary.  Correctness first.
6229     // Note: "by value" is code for passing a structure by value, not
6230     // basic types.
6231     if (Flags.isByVal()) {
6232       // Note: Size includes alignment padding, so
6233       //   struct x { short a; char b; }
6234       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
6235       // These are the proper values we need for right-justifying the
6236       // aggregate in a parameter register.
6237       unsigned Size = Flags.getByValSize();
6238 
6239       // An empty aggregate parameter takes up no storage and no
6240       // registers.
6241       if (Size == 0)
6242         continue;
6243 
6244       if (IsFastCall)
6245         ComputePtrOff();
6246 
6247       // All aggregates smaller than 8 bytes must be passed right-justified.
6248       if (Size==1 || Size==2 || Size==4) {
6249         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6250         if (GPR_idx != NumGPRs) {
6251           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6252                                         MachinePointerInfo(), VT);
6253           MemOpChains.push_back(Load.getValue(1));
6254           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6255 
6256           ArgOffset += PtrByteSize;
6257           continue;
6258         }
6259       }
6260 
6261       if (GPR_idx == NumGPRs && Size < 8) {
6262         SDValue AddPtr = PtrOff;
6263         if (!isLittleEndian) {
6264           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6265                                           PtrOff.getValueType());
6266           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6267         }
6268         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6269                                                           CallSeqStart,
6270                                                           Flags, DAG, dl);
6271         ArgOffset += PtrByteSize;
6272         continue;
6273       }
6274       // Copy entire object into memory.  There are cases where gcc-generated
6275       // code assumes it is there, even if it could be put entirely into
6276       // registers.  (This is not what the doc says.)
6277 
6278       // FIXME: The above statement is likely due to a misunderstanding of the
6279       // documents.  All arguments must be copied into the parameter area BY
6280       // THE CALLEE in the event that the callee takes the address of any
6281       // formal argument.  That has not yet been implemented.  However, it is
6282       // reasonable to use the stack area as a staging area for the register
6283       // load.
6284 
6285       // Skip this for small aggregates, as we will use the same slot for a
6286       // right-justified copy, below.
6287       if (Size >= 8)
6288         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6289                                                           CallSeqStart,
6290                                                           Flags, DAG, dl);
6291 
6292       // When a register is available, pass a small aggregate right-justified.
6293       if (Size < 8 && GPR_idx != NumGPRs) {
6294         // The easiest way to get this right-justified in a register
6295         // is to copy the structure into the rightmost portion of a
6296         // local variable slot, then load the whole slot into the
6297         // register.
6298         // FIXME: The memcpy seems to produce pretty awful code for
6299         // small aggregates, particularly for packed ones.
6300         // FIXME: It would be preferable to use the slot in the
6301         // parameter save area instead of a new local variable.
6302         SDValue AddPtr = PtrOff;
6303         if (!isLittleEndian) {
6304           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6305           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6306         }
6307         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6308                                                           CallSeqStart,
6309                                                           Flags, DAG, dl);
6310 
6311         // Load the slot into the register.
6312         SDValue Load =
6313             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6314         MemOpChains.push_back(Load.getValue(1));
6315         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6316 
6317         // Done with this argument.
6318         ArgOffset += PtrByteSize;
6319         continue;
6320       }
6321 
6322       // For aggregates larger than PtrByteSize, copy the pieces of the
6323       // object that fit into registers from the parameter save area.
6324       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6325         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6326         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6327         if (GPR_idx != NumGPRs) {
6328           SDValue Load =
6329               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6330           MemOpChains.push_back(Load.getValue(1));
6331           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6332           ArgOffset += PtrByteSize;
6333         } else {
6334           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6335           break;
6336         }
6337       }
6338       continue;
6339     }
6340 
6341     switch (Arg.getSimpleValueType().SimpleTy) {
6342     default: llvm_unreachable("Unexpected ValueType for argument!");
6343     case MVT::i1:
6344     case MVT::i32:
6345     case MVT::i64:
6346       if (Flags.isNest()) {
6347         // The 'nest' parameter, if any, is passed in R11.
6348         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6349         break;
6350       }
6351 
6352       // These can be scalar arguments or elements of an integer array type
6353       // passed directly.  Clang may use those instead of "byval" aggregate
6354       // types to avoid forcing arguments to memory unnecessarily.
6355       if (GPR_idx != NumGPRs) {
6356         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6357       } else {
6358         if (IsFastCall)
6359           ComputePtrOff();
6360 
6361         assert(HasParameterArea &&
6362                "Parameter area must exist to pass an argument in memory.");
6363         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6364                          true, CFlags.IsTailCall, false, MemOpChains,
6365                          TailCallArguments, dl);
6366         if (IsFastCall)
6367           ArgOffset += PtrByteSize;
6368       }
6369       if (!IsFastCall)
6370         ArgOffset += PtrByteSize;
6371       break;
6372     case MVT::f32:
6373     case MVT::f64: {
6374       // These can be scalar arguments or elements of a float array type
6375       // passed directly.  The latter are used to implement ELFv2 homogenous
6376       // float aggregates.
6377 
6378       // Named arguments go into FPRs first, and once they overflow, the
6379       // remaining arguments go into GPRs and then the parameter save area.
6380       // Unnamed arguments for vararg functions always go to GPRs and
6381       // then the parameter save area.  For now, put all arguments to vararg
6382       // routines always in both locations (FPR *and* GPR or stack slot).
6383       bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6384       bool NeededLoad = false;
6385 
6386       // First load the argument into the next available FPR.
6387       if (FPR_idx != NumFPRs)
6388         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6389 
6390       // Next, load the argument into GPR or stack slot if needed.
6391       if (!NeedGPROrStack)
6392         ;
6393       else if (GPR_idx != NumGPRs && !IsFastCall) {
6394         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6395         // once we support fp <-> gpr moves.
6396 
6397         // In the non-vararg case, this can only ever happen in the
6398         // presence of f32 array types, since otherwise we never run
6399         // out of FPRs before running out of GPRs.
6400         SDValue ArgVal;
6401 
6402         // Double values are always passed in a single GPR.
6403         if (Arg.getValueType() != MVT::f32) {
6404           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6405 
6406         // Non-array float values are extended and passed in a GPR.
6407         } else if (!Flags.isInConsecutiveRegs()) {
6408           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6409           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6410 
6411         // If we have an array of floats, we collect every odd element
6412         // together with its predecessor into one GPR.
6413         } else if (ArgOffset % PtrByteSize != 0) {
6414           SDValue Lo, Hi;
6415           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6416           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6417           if (!isLittleEndian)
6418             std::swap(Lo, Hi);
6419           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6420 
6421         // The final element, if even, goes into the first half of a GPR.
6422         } else if (Flags.isInConsecutiveRegsLast()) {
6423           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6424           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6425           if (!isLittleEndian)
6426             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6427                                  DAG.getConstant(32, dl, MVT::i32));
6428 
6429         // Non-final even elements are skipped; they will be handled
6430         // together the with subsequent argument on the next go-around.
6431         } else
6432           ArgVal = SDValue();
6433 
6434         if (ArgVal.getNode())
6435           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6436       } else {
6437         if (IsFastCall)
6438           ComputePtrOff();
6439 
6440         // Single-precision floating-point values are mapped to the
6441         // second (rightmost) word of the stack doubleword.
6442         if (Arg.getValueType() == MVT::f32 &&
6443             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6444           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6445           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6446         }
6447 
6448         assert(HasParameterArea &&
6449                "Parameter area must exist to pass an argument in memory.");
6450         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6451                          true, CFlags.IsTailCall, false, MemOpChains,
6452                          TailCallArguments, dl);
6453 
6454         NeededLoad = true;
6455       }
6456       // When passing an array of floats, the array occupies consecutive
6457       // space in the argument area; only round up to the next doubleword
6458       // at the end of the array.  Otherwise, each float takes 8 bytes.
6459       if (!IsFastCall || NeededLoad) {
6460         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6461                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6462         if (Flags.isInConsecutiveRegsLast())
6463           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6464       }
6465       break;
6466     }
6467     case MVT::v4f32:
6468     case MVT::v4i32:
6469     case MVT::v8i16:
6470     case MVT::v16i8:
6471     case MVT::v2f64:
6472     case MVT::v2i64:
6473     case MVT::v1i128:
6474     case MVT::f128:
6475       // These can be scalar arguments or elements of a vector array type
6476       // passed directly.  The latter are used to implement ELFv2 homogenous
6477       // vector aggregates.
6478 
6479       // For a varargs call, named arguments go into VRs or on the stack as
6480       // usual; unnamed arguments always go to the stack or the corresponding
6481       // GPRs when within range.  For now, we always put the value in both
6482       // locations (or even all three).
6483       if (CFlags.IsVarArg) {
6484         assert(HasParameterArea &&
6485                "Parameter area must exist if we have a varargs call.");
6486         // We could elide this store in the case where the object fits
6487         // entirely in R registers.  Maybe later.
6488         SDValue Store =
6489             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6490         MemOpChains.push_back(Store);
6491         if (VR_idx != NumVRs) {
6492           SDValue Load =
6493               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6494           MemOpChains.push_back(Load.getValue(1));
6495           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6496         }
6497         ArgOffset += 16;
6498         for (unsigned i=0; i<16; i+=PtrByteSize) {
6499           if (GPR_idx == NumGPRs)
6500             break;
6501           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6502                                    DAG.getConstant(i, dl, PtrVT));
6503           SDValue Load =
6504               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6505           MemOpChains.push_back(Load.getValue(1));
6506           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6507         }
6508         break;
6509       }
6510 
6511       // Non-varargs Altivec params go into VRs or on the stack.
6512       if (VR_idx != NumVRs) {
6513         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6514       } else {
6515         if (IsFastCall)
6516           ComputePtrOff();
6517 
6518         assert(HasParameterArea &&
6519                "Parameter area must exist to pass an argument in memory.");
6520         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6521                          true, CFlags.IsTailCall, true, MemOpChains,
6522                          TailCallArguments, dl);
6523         if (IsFastCall)
6524           ArgOffset += 16;
6525       }
6526 
6527       if (!IsFastCall)
6528         ArgOffset += 16;
6529       break;
6530     }
6531   }
6532 
6533   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6534          "mismatch in size of parameter area");
6535   (void)NumBytesActuallyUsed;
6536 
6537   if (!MemOpChains.empty())
6538     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6539 
6540   // Check if this is an indirect call (MTCTR/BCTRL).
6541   // See prepareDescriptorIndirectCall and buildCallOperands for more
6542   // information about calls through function pointers in the 64-bit SVR4 ABI.
6543   if (CFlags.IsIndirect) {
6544     // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6545     // caller in the TOC save area.
6546     if (isTOCSaveRestoreRequired(Subtarget)) {
6547       assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6548       // Load r2 into a virtual register and store it to the TOC save area.
6549       setUsesTOCBasePtr(DAG);
6550       SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6551       // TOC save area offset.
6552       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6553       SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6554       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6555       Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6556                            MachinePointerInfo::getStack(
6557                                DAG.getMachineFunction(), TOCSaveOffset));
6558     }
6559     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6560     // This does not mean the MTCTR instruction must use R12; it's easier
6561     // to model this as an extra parameter, so do that.
6562     if (isELFv2ABI && !CFlags.IsPatchPoint)
6563       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6564   }
6565 
6566   // Build a sequence of copy-to-reg nodes chained together with token chain
6567   // and flag operands which copy the outgoing args into the appropriate regs.
6568   SDValue InFlag;
6569   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6570     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6571                              RegsToPass[i].second, InFlag);
6572     InFlag = Chain.getValue(1);
6573   }
6574 
6575   if (CFlags.IsTailCall && !IsSibCall)
6576     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6577                     TailCallArguments);
6578 
6579   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6580                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6581 }
6582 
6583 SDValue PPCTargetLowering::LowerCall_Darwin(
6584     SDValue Chain, SDValue Callee, CallFlags CFlags,
6585     const SmallVectorImpl<ISD::OutputArg> &Outs,
6586     const SmallVectorImpl<SDValue> &OutVals,
6587     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6588     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6589     const CallBase *CB) const {
6590   unsigned NumOps = Outs.size();
6591 
6592   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6593   bool isPPC64 = PtrVT == MVT::i64;
6594   unsigned PtrByteSize = isPPC64 ? 8 : 4;
6595 
6596   MachineFunction &MF = DAG.getMachineFunction();
6597 
6598   // Mark this function as potentially containing a function that contains a
6599   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6600   // and restoring the callers stack pointer in this functions epilog. This is
6601   // done because by tail calling the called function might overwrite the value
6602   // in this function's (MF) stack pointer stack slot 0(SP).
6603   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6604       CFlags.CallConv == CallingConv::Fast)
6605     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6606 
6607   // Count how many bytes are to be pushed on the stack, including the linkage
6608   // area, and parameter passing area.  We start with 24/48 bytes, which is
6609   // prereserved space for [SP][CR][LR][3 x unused].
6610   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6611   unsigned NumBytes = LinkageSize;
6612 
6613   // Add up all the space actually used.
6614   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6615   // they all go in registers, but we must reserve stack space for them for
6616   // possible use by the caller.  In varargs or 64-bit calls, parameters are
6617   // assigned stack space in order, with padding so Altivec parameters are
6618   // 16-byte aligned.
6619   unsigned nAltivecParamsAtEnd = 0;
6620   for (unsigned i = 0; i != NumOps; ++i) {
6621     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6622     EVT ArgVT = Outs[i].VT;
6623     // Varargs Altivec parameters are padded to a 16 byte boundary.
6624     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6625         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6626         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6627       if (!CFlags.IsVarArg && !isPPC64) {
6628         // Non-varargs Altivec parameters go after all the non-Altivec
6629         // parameters; handle those later so we know how much padding we need.
6630         nAltivecParamsAtEnd++;
6631         continue;
6632       }
6633       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6634       NumBytes = ((NumBytes+15)/16)*16;
6635     }
6636     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6637   }
6638 
6639   // Allow for Altivec parameters at the end, if needed.
6640   if (nAltivecParamsAtEnd) {
6641     NumBytes = ((NumBytes+15)/16)*16;
6642     NumBytes += 16*nAltivecParamsAtEnd;
6643   }
6644 
6645   // The prolog code of the callee may store up to 8 GPR argument registers to
6646   // the stack, allowing va_start to index over them in memory if its varargs.
6647   // Because we cannot tell if this is needed on the caller side, we have to
6648   // conservatively assume that it is needed.  As such, make sure we have at
6649   // least enough stack space for the caller to store the 8 GPRs.
6650   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6651 
6652   // Tail call needs the stack to be aligned.
6653   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6654       CFlags.CallConv == CallingConv::Fast)
6655     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6656 
6657   // Calculate by how many bytes the stack has to be adjusted in case of tail
6658   // call optimization.
6659   int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6660 
6661   // To protect arguments on the stack from being clobbered in a tail call,
6662   // force all the loads to happen before doing any other lowering.
6663   if (CFlags.IsTailCall)
6664     Chain = DAG.getStackArgumentTokenFactor(Chain);
6665 
6666   // Adjust the stack pointer for the new arguments...
6667   // These operations are automatically eliminated by the prolog/epilog pass
6668   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6669   SDValue CallSeqStart = Chain;
6670 
6671   // Load the return address and frame pointer so it can be move somewhere else
6672   // later.
6673   SDValue LROp, FPOp;
6674   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6675 
6676   // Set up a copy of the stack pointer for use loading and storing any
6677   // arguments that may not fit in the registers available for argument
6678   // passing.
6679   SDValue StackPtr;
6680   if (isPPC64)
6681     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6682   else
6683     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6684 
6685   // Figure out which arguments are going to go in registers, and which in
6686   // memory.  Also, if this is a vararg function, floating point operations
6687   // must be stored to our stack, and loaded into integer regs as well, if
6688   // any integer regs are available for argument passing.
6689   unsigned ArgOffset = LinkageSize;
6690   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6691 
6692   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6693     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6694     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6695   };
6696   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6697     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6698     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6699   };
6700   static const MCPhysReg VR[] = {
6701     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6702     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6703   };
6704   const unsigned NumGPRs = array_lengthof(GPR_32);
6705   const unsigned NumFPRs = 13;
6706   const unsigned NumVRs  = array_lengthof(VR);
6707 
6708   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6709 
6710   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6711   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6712 
6713   SmallVector<SDValue, 8> MemOpChains;
6714   for (unsigned i = 0; i != NumOps; ++i) {
6715     SDValue Arg = OutVals[i];
6716     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6717 
6718     // PtrOff will be used to store the current argument to the stack if a
6719     // register cannot be found for it.
6720     SDValue PtrOff;
6721 
6722     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6723 
6724     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6725 
6726     // On PPC64, promote integers to 64-bit values.
6727     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6728       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6729       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6730       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6731     }
6732 
6733     // FIXME memcpy is used way more than necessary.  Correctness first.
6734     // Note: "by value" is code for passing a structure by value, not
6735     // basic types.
6736     if (Flags.isByVal()) {
6737       unsigned Size = Flags.getByValSize();
6738       // Very small objects are passed right-justified.  Everything else is
6739       // passed left-justified.
6740       if (Size==1 || Size==2) {
6741         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6742         if (GPR_idx != NumGPRs) {
6743           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6744                                         MachinePointerInfo(), VT);
6745           MemOpChains.push_back(Load.getValue(1));
6746           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6747 
6748           ArgOffset += PtrByteSize;
6749         } else {
6750           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6751                                           PtrOff.getValueType());
6752           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6753           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6754                                                             CallSeqStart,
6755                                                             Flags, DAG, dl);
6756           ArgOffset += PtrByteSize;
6757         }
6758         continue;
6759       }
6760       // Copy entire object into memory.  There are cases where gcc-generated
6761       // code assumes it is there, even if it could be put entirely into
6762       // registers.  (This is not what the doc says.)
6763       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6764                                                         CallSeqStart,
6765                                                         Flags, DAG, dl);
6766 
6767       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6768       // copy the pieces of the object that fit into registers from the
6769       // parameter save area.
6770       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6771         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6772         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6773         if (GPR_idx != NumGPRs) {
6774           SDValue Load =
6775               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6776           MemOpChains.push_back(Load.getValue(1));
6777           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6778           ArgOffset += PtrByteSize;
6779         } else {
6780           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6781           break;
6782         }
6783       }
6784       continue;
6785     }
6786 
6787     switch (Arg.getSimpleValueType().SimpleTy) {
6788     default: llvm_unreachable("Unexpected ValueType for argument!");
6789     case MVT::i1:
6790     case MVT::i32:
6791     case MVT::i64:
6792       if (GPR_idx != NumGPRs) {
6793         if (Arg.getValueType() == MVT::i1)
6794           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6795 
6796         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6797       } else {
6798         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6799                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6800                          TailCallArguments, dl);
6801       }
6802       ArgOffset += PtrByteSize;
6803       break;
6804     case MVT::f32:
6805     case MVT::f64:
6806       if (FPR_idx != NumFPRs) {
6807         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6808 
6809         if (CFlags.IsVarArg) {
6810           SDValue Store =
6811               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6812           MemOpChains.push_back(Store);
6813 
6814           // Float varargs are always shadowed in available integer registers
6815           if (GPR_idx != NumGPRs) {
6816             SDValue Load =
6817                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6818             MemOpChains.push_back(Load.getValue(1));
6819             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6820           }
6821           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6822             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6823             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6824             SDValue Load =
6825                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6826             MemOpChains.push_back(Load.getValue(1));
6827             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6828           }
6829         } else {
6830           // If we have any FPRs remaining, we may also have GPRs remaining.
6831           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6832           // GPRs.
6833           if (GPR_idx != NumGPRs)
6834             ++GPR_idx;
6835           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6836               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6837             ++GPR_idx;
6838         }
6839       } else
6840         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6841                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6842                          TailCallArguments, dl);
6843       if (isPPC64)
6844         ArgOffset += 8;
6845       else
6846         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6847       break;
6848     case MVT::v4f32:
6849     case MVT::v4i32:
6850     case MVT::v8i16:
6851     case MVT::v16i8:
6852       if (CFlags.IsVarArg) {
6853         // These go aligned on the stack, or in the corresponding R registers
6854         // when within range.  The Darwin PPC ABI doc claims they also go in
6855         // V registers; in fact gcc does this only for arguments that are
6856         // prototyped, not for those that match the ...  We do it for all
6857         // arguments, seems to work.
6858         while (ArgOffset % 16 !=0) {
6859           ArgOffset += PtrByteSize;
6860           if (GPR_idx != NumGPRs)
6861             GPR_idx++;
6862         }
6863         // We could elide this store in the case where the object fits
6864         // entirely in R registers.  Maybe later.
6865         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6866                              DAG.getConstant(ArgOffset, dl, PtrVT));
6867         SDValue Store =
6868             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6869         MemOpChains.push_back(Store);
6870         if (VR_idx != NumVRs) {
6871           SDValue Load =
6872               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6873           MemOpChains.push_back(Load.getValue(1));
6874           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6875         }
6876         ArgOffset += 16;
6877         for (unsigned i=0; i<16; i+=PtrByteSize) {
6878           if (GPR_idx == NumGPRs)
6879             break;
6880           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6881                                    DAG.getConstant(i, dl, PtrVT));
6882           SDValue Load =
6883               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6884           MemOpChains.push_back(Load.getValue(1));
6885           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6886         }
6887         break;
6888       }
6889 
6890       // Non-varargs Altivec params generally go in registers, but have
6891       // stack space allocated at the end.
6892       if (VR_idx != NumVRs) {
6893         // Doesn't have GPR space allocated.
6894         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6895       } else if (nAltivecParamsAtEnd==0) {
6896         // We are emitting Altivec params in order.
6897         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6898                          isPPC64, CFlags.IsTailCall, true, MemOpChains,
6899                          TailCallArguments, dl);
6900         ArgOffset += 16;
6901       }
6902       break;
6903     }
6904   }
6905   // If all Altivec parameters fit in registers, as they usually do,
6906   // they get stack space following the non-Altivec parameters.  We
6907   // don't track this here because nobody below needs it.
6908   // If there are more Altivec parameters than fit in registers emit
6909   // the stores here.
6910   if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
6911     unsigned j = 0;
6912     // Offset is aligned; skip 1st 12 params which go in V registers.
6913     ArgOffset = ((ArgOffset+15)/16)*16;
6914     ArgOffset += 12*16;
6915     for (unsigned i = 0; i != NumOps; ++i) {
6916       SDValue Arg = OutVals[i];
6917       EVT ArgType = Outs[i].VT;
6918       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6919           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6920         if (++j > NumVRs) {
6921           SDValue PtrOff;
6922           // We are emitting Altivec params in order.
6923           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6924                            isPPC64, CFlags.IsTailCall, true, MemOpChains,
6925                            TailCallArguments, dl);
6926           ArgOffset += 16;
6927         }
6928       }
6929     }
6930   }
6931 
6932   if (!MemOpChains.empty())
6933     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6934 
6935   // On Darwin, R12 must contain the address of an indirect callee.  This does
6936   // not mean the MTCTR instruction must use R12; it's easier to model this as
6937   // an extra parameter, so do that.
6938   if (CFlags.IsIndirect) {
6939     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
6940     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6941                                                    PPC::R12), Callee));
6942   }
6943 
6944   // Build a sequence of copy-to-reg nodes chained together with token chain
6945   // and flag operands which copy the outgoing args into the appropriate regs.
6946   SDValue InFlag;
6947   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6948     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6949                              RegsToPass[i].second, InFlag);
6950     InFlag = Chain.getValue(1);
6951   }
6952 
6953   if (CFlags.IsTailCall)
6954     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6955                     TailCallArguments);
6956 
6957   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6958                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6959 }
6960 
6961 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6962                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6963                    CCState &State) {
6964 
6965   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6966       State.getMachineFunction().getSubtarget());
6967   const bool IsPPC64 = Subtarget.isPPC64();
6968   const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6969   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6970 
6971   assert((!ValVT.isInteger() ||
6972           (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) &&
6973          "Integer argument exceeds register size: should have been legalized");
6974 
6975   if (ValVT == MVT::f128)
6976     report_fatal_error("f128 is unimplemented on AIX.");
6977 
6978   if (ArgFlags.isNest())
6979     report_fatal_error("Nest arguments are unimplemented.");
6980 
6981   if (ValVT.isVector() || LocVT.isVector())
6982     report_fatal_error("Vector arguments are unimplemented on AIX.");
6983 
6984   static const MCPhysReg GPR_32[] = {// 32-bit registers.
6985                                      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6986                                      PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6987   static const MCPhysReg GPR_64[] = {// 64-bit registers.
6988                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6989                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6990 
6991   if (ArgFlags.isByVal()) {
6992     if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6993       report_fatal_error("Pass-by-value arguments with alignment greater than "
6994                          "register width are not supported.");
6995 
6996     const unsigned ByValSize = ArgFlags.getByValSize();
6997 
6998     // An empty aggregate parameter takes up no storage and no registers,
6999     // but needs a MemLoc for a stack slot for the formal arguments side.
7000     if (ByValSize == 0) {
7001       State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
7002                                        State.getNextStackOffset(), RegVT,
7003                                        LocInfo));
7004       return false;
7005     }
7006 
7007     const unsigned StackSize = alignTo(ByValSize, PtrAlign);
7008     unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
7009     for (const unsigned E = Offset + StackSize; Offset < E;
7010          Offset += PtrAlign.value()) {
7011       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
7012         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7013       else {
7014         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
7015                                          Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
7016                                          LocInfo));
7017         break;
7018       }
7019     }
7020     return false;
7021   }
7022 
7023   // Arguments always reserve parameter save area.
7024   switch (ValVT.SimpleTy) {
7025   default:
7026     report_fatal_error("Unhandled value type for argument.");
7027   case MVT::i64:
7028     // i64 arguments should have been split to i32 for PPC32.
7029     assert(IsPPC64 && "PPC32 should have split i64 values.");
7030     LLVM_FALLTHROUGH;
7031   case MVT::i1:
7032   case MVT::i32: {
7033     const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
7034     // AIX integer arguments are always passed in register width.
7035     if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
7036       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
7037                                   : CCValAssign::LocInfo::ZExt;
7038     if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
7039       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7040     else
7041       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
7042 
7043     return false;
7044   }
7045   case MVT::f32:
7046   case MVT::f64: {
7047     // Parameter save area (PSA) is reserved even if the float passes in fpr.
7048     const unsigned StoreSize = LocVT.getStoreSize();
7049     // Floats are always 4-byte aligned in the PSA on AIX.
7050     // This includes f64 in 64-bit mode for ABI compatibility.
7051     const unsigned Offset =
7052         State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
7053     unsigned FReg = State.AllocateReg(FPR);
7054     if (FReg)
7055       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7056 
7057     // Reserve and initialize GPRs or initialize the PSA as required.
7058     for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
7059       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
7060         assert(FReg && "An FPR should be available when a GPR is reserved.");
7061         if (State.isVarArg()) {
7062           // Successfully reserved GPRs are only initialized for vararg calls.
7063           // Custom handling is required for:
7064           //   f64 in PPC32 needs to be split into 2 GPRs.
7065           //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7066           State.addLoc(
7067               CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7068         }
7069       } else {
7070         // If there are insufficient GPRs, the PSA needs to be initialized.
7071         // Initialization occurs even if an FPR was initialized for
7072         // compatibility with the AIX XL compiler. The full memory for the
7073         // argument will be initialized even if a prior word is saved in GPR.
7074         // A custom memLoc is used when the argument also passes in FPR so
7075         // that the callee handling can skip over it easily.
7076         State.addLoc(
7077             FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7078                                              LocInfo)
7079                  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7080         break;
7081       }
7082     }
7083 
7084     return false;
7085   }
7086   }
7087   return true;
7088 }
7089 
7090 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7091                                                     bool IsPPC64) {
7092   assert((IsPPC64 || SVT != MVT::i64) &&
7093          "i64 should have been split for 32-bit codegen.");
7094 
7095   switch (SVT) {
7096   default:
7097     report_fatal_error("Unexpected value type for formal argument");
7098   case MVT::i1:
7099   case MVT::i32:
7100   case MVT::i64:
7101     return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7102   case MVT::f32:
7103     return &PPC::F4RCRegClass;
7104   case MVT::f64:
7105     return &PPC::F8RCRegClass;
7106   }
7107 }
7108 
7109 static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7110                                         SelectionDAG &DAG, SDValue ArgValue,
7111                                         MVT LocVT, const SDLoc &dl) {
7112   assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7113   assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7114 
7115   if (Flags.isSExt())
7116     ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7117                            DAG.getValueType(ValVT));
7118   else if (Flags.isZExt())
7119     ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7120                            DAG.getValueType(ValVT));
7121 
7122   return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7123 }
7124 
7125 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7126   const unsigned LASize = FL->getLinkageSize();
7127 
7128   if (PPC::GPRCRegClass.contains(Reg)) {
7129     assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7130            "Reg must be a valid argument register!");
7131     return LASize + 4 * (Reg - PPC::R3);
7132   }
7133 
7134   if (PPC::G8RCRegClass.contains(Reg)) {
7135     assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7136            "Reg must be a valid argument register!");
7137     return LASize + 8 * (Reg - PPC::X3);
7138   }
7139 
7140   llvm_unreachable("Only general purpose registers expected.");
7141 }
7142 
7143 //   AIX ABI Stack Frame Layout:
7144 //
7145 //   Low Memory +--------------------------------------------+
7146 //   SP   +---> | Back chain                                 | ---+
7147 //        |     +--------------------------------------------+    |
7148 //        |     | Saved Condition Register                   |    |
7149 //        |     +--------------------------------------------+    |
7150 //        |     | Saved Linkage Register                     |    |
7151 //        |     +--------------------------------------------+    | Linkage Area
7152 //        |     | Reserved for compilers                     |    |
7153 //        |     +--------------------------------------------+    |
7154 //        |     | Reserved for binders                       |    |
7155 //        |     +--------------------------------------------+    |
7156 //        |     | Saved TOC pointer                          | ---+
7157 //        |     +--------------------------------------------+
7158 //        |     | Parameter save area                        |
7159 //        |     +--------------------------------------------+
7160 //        |     | Alloca space                               |
7161 //        |     +--------------------------------------------+
7162 //        |     | Local variable space                       |
7163 //        |     +--------------------------------------------+
7164 //        |     | Float/int conversion temporary             |
7165 //        |     +--------------------------------------------+
7166 //        |     | Save area for AltiVec registers            |
7167 //        |     +--------------------------------------------+
7168 //        |     | AltiVec alignment padding                  |
7169 //        |     +--------------------------------------------+
7170 //        |     | Save area for VRSAVE register              |
7171 //        |     +--------------------------------------------+
7172 //        |     | Save area for General Purpose registers    |
7173 //        |     +--------------------------------------------+
7174 //        |     | Save area for Floating Point registers     |
7175 //        |     +--------------------------------------------+
7176 //        +---- | Back chain                                 |
7177 // High Memory  +--------------------------------------------+
7178 //
7179 //  Specifications:
7180 //  AIX 7.2 Assembler Language Reference
7181 //  Subroutine linkage convention
7182 
7183 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7184     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7185     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7186     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7187 
7188   assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7189           CallConv == CallingConv::Fast) &&
7190          "Unexpected calling convention!");
7191 
7192   if (getTargetMachine().Options.GuaranteedTailCallOpt)
7193     report_fatal_error("Tail call support is unimplemented on AIX.");
7194 
7195   if (useSoftFloat())
7196     report_fatal_error("Soft float support is unimplemented on AIX.");
7197 
7198   const PPCSubtarget &Subtarget =
7199       static_cast<const PPCSubtarget &>(DAG.getSubtarget());
7200 
7201   const bool IsPPC64 = Subtarget.isPPC64();
7202   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7203 
7204   // Assign locations to all of the incoming arguments.
7205   SmallVector<CCValAssign, 16> ArgLocs;
7206   MachineFunction &MF = DAG.getMachineFunction();
7207   MachineFrameInfo &MFI = MF.getFrameInfo();
7208   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7209 
7210   const EVT PtrVT = getPointerTy(MF.getDataLayout());
7211   // Reserve space for the linkage area on the stack.
7212   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7213   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7214   CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7215 
7216   SmallVector<SDValue, 8> MemOps;
7217 
7218   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7219     CCValAssign &VA = ArgLocs[I++];
7220     MVT LocVT = VA.getLocVT();
7221     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7222 
7223     // For compatibility with the AIX XL compiler, the float args in the
7224     // parameter save area are initialized even if the argument is available
7225     // in register.  The caller is required to initialize both the register
7226     // and memory, however, the callee can choose to expect it in either.
7227     // The memloc is dismissed here because the argument is retrieved from
7228     // the register.
7229     if (VA.isMemLoc() && VA.needsCustom())
7230       continue;
7231 
7232     if (Flags.isByVal() && VA.isMemLoc()) {
7233       const unsigned Size =
7234           alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7235                   PtrByteSize);
7236       const int FI = MF.getFrameInfo().CreateFixedObject(
7237           Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7238           /* IsAliased */ true);
7239       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7240       InVals.push_back(FIN);
7241 
7242       continue;
7243     }
7244 
7245     if (Flags.isByVal()) {
7246       assert(VA.isRegLoc() && "MemLocs should already be handled.");
7247 
7248       const MCPhysReg ArgReg = VA.getLocReg();
7249       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7250 
7251       if (Flags.getNonZeroByValAlign() > PtrByteSize)
7252         report_fatal_error("Over aligned byvals not supported yet.");
7253 
7254       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7255       const int FI = MF.getFrameInfo().CreateFixedObject(
7256           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7257           /* IsAliased */ true);
7258       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7259       InVals.push_back(FIN);
7260 
7261       // Add live ins for all the RegLocs for the same ByVal.
7262       const TargetRegisterClass *RegClass =
7263           IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7264 
7265       auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7266                                                unsigned Offset) {
7267         const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
7268         // Since the callers side has left justified the aggregate in the
7269         // register, we can simply store the entire register into the stack
7270         // slot.
7271         SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7272         // The store to the fixedstack object is needed becuase accessing a
7273         // field of the ByVal will use a gep and load. Ideally we will optimize
7274         // to extracting the value from the register directly, and elide the
7275         // stores when the arguments address is not taken, but that will need to
7276         // be future work.
7277         SDValue Store = DAG.getStore(
7278             CopyFrom.getValue(1), dl, CopyFrom,
7279             DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7280             MachinePointerInfo::getFixedStack(MF, FI, Offset));
7281 
7282         MemOps.push_back(Store);
7283       };
7284 
7285       unsigned Offset = 0;
7286       HandleRegLoc(VA.getLocReg(), Offset);
7287       Offset += PtrByteSize;
7288       for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7289            Offset += PtrByteSize) {
7290         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7291                "RegLocs should be for ByVal argument.");
7292 
7293         const CCValAssign RL = ArgLocs[I++];
7294         HandleRegLoc(RL.getLocReg(), Offset);
7295       }
7296 
7297       if (Offset != StackSize) {
7298         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7299                "Expected MemLoc for remaining bytes.");
7300         assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7301         // Consume the MemLoc.The InVal has already been emitted, so nothing
7302         // more needs to be done.
7303         ++I;
7304       }
7305 
7306       continue;
7307     }
7308 
7309     EVT ValVT = VA.getValVT();
7310     if (VA.isRegLoc() && !VA.needsCustom()) {
7311       MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
7312       unsigned VReg =
7313           MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
7314       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7315       if (ValVT.isScalarInteger() &&
7316           (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7317         ArgValue =
7318             truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7319       }
7320       InVals.push_back(ArgValue);
7321       continue;
7322     }
7323     if (VA.isMemLoc()) {
7324       const unsigned LocSize = LocVT.getStoreSize();
7325       const unsigned ValSize = ValVT.getStoreSize();
7326       assert((ValSize <= LocSize) &&
7327              "Object size is larger than size of MemLoc");
7328       int CurArgOffset = VA.getLocMemOffset();
7329       // Objects are right-justified because AIX is big-endian.
7330       if (LocSize > ValSize)
7331         CurArgOffset += LocSize - ValSize;
7332       // Potential tail calls could cause overwriting of argument stack slots.
7333       const bool IsImmutable =
7334           !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7335             (CallConv == CallingConv::Fast));
7336       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7337       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7338       SDValue ArgValue =
7339           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7340       InVals.push_back(ArgValue);
7341       continue;
7342     }
7343   }
7344 
7345   // On AIX a minimum of 8 words is saved to the parameter save area.
7346   const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7347   // Area that is at least reserved in the caller of this function.
7348   unsigned CallerReservedArea =
7349       std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7350 
7351   // Set the size that is at least reserved in caller of this function. Tail
7352   // call optimized function's reserved stack space needs to be aligned so
7353   // that taking the difference between two stack areas will result in an
7354   // aligned stack.
7355   CallerReservedArea =
7356       EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7357   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7358   FuncInfo->setMinReservedArea(CallerReservedArea);
7359 
7360   if (isVarArg) {
7361     FuncInfo->setVarArgsFrameIndex(
7362         MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7363     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7364 
7365     static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7366                                        PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7367 
7368     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7369                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7370     const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7371 
7372     // The fixed integer arguments of a variadic function are stored to the
7373     // VarArgsFrameIndex on the stack so that they may be loaded by
7374     // dereferencing the result of va_next.
7375     for (unsigned GPRIndex =
7376              (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7377          GPRIndex < NumGPArgRegs; ++GPRIndex) {
7378 
7379       const unsigned VReg =
7380           IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7381                   : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7382 
7383       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7384       SDValue Store =
7385           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7386       MemOps.push_back(Store);
7387       // Increment the address for the next argument to store.
7388       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7389       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7390     }
7391   }
7392 
7393   if (!MemOps.empty())
7394     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7395 
7396   return Chain;
7397 }
7398 
7399 SDValue PPCTargetLowering::LowerCall_AIX(
7400     SDValue Chain, SDValue Callee, CallFlags CFlags,
7401     const SmallVectorImpl<ISD::OutputArg> &Outs,
7402     const SmallVectorImpl<SDValue> &OutVals,
7403     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7404     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7405     const CallBase *CB) const {
7406   // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7407   // AIX ABI stack frame layout.
7408 
7409   assert((CFlags.CallConv == CallingConv::C ||
7410           CFlags.CallConv == CallingConv::Cold ||
7411           CFlags.CallConv == CallingConv::Fast) &&
7412          "Unexpected calling convention!");
7413 
7414   if (CFlags.IsPatchPoint)
7415     report_fatal_error("This call type is unimplemented on AIX.");
7416 
7417   const PPCSubtarget& Subtarget =
7418       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7419   if (Subtarget.hasAltivec())
7420     report_fatal_error("Altivec support is unimplemented on AIX.");
7421 
7422   MachineFunction &MF = DAG.getMachineFunction();
7423   SmallVector<CCValAssign, 16> ArgLocs;
7424   CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7425                  *DAG.getContext());
7426 
7427   // Reserve space for the linkage save area (LSA) on the stack.
7428   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7429   //   [SP][CR][LR][2 x reserved][TOC].
7430   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7431   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7432   const bool IsPPC64 = Subtarget.isPPC64();
7433   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7434   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7435   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7436   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7437 
7438   // The prolog code of the callee may store up to 8 GPR argument registers to
7439   // the stack, allowing va_start to index over them in memory if the callee
7440   // is variadic.
7441   // Because we cannot tell if this is needed on the caller side, we have to
7442   // conservatively assume that it is needed.  As such, make sure we have at
7443   // least enough stack space for the caller to store the 8 GPRs.
7444   const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7445   const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7446                                      CCInfo.getNextStackOffset());
7447 
7448   // Adjust the stack pointer for the new arguments...
7449   // These operations are automatically eliminated by the prolog/epilog pass.
7450   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7451   SDValue CallSeqStart = Chain;
7452 
7453   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7454   SmallVector<SDValue, 8> MemOpChains;
7455 
7456   // Set up a copy of the stack pointer for loading and storing any
7457   // arguments that may not fit in the registers available for argument
7458   // passing.
7459   const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7460                                    : DAG.getRegister(PPC::R1, MVT::i32);
7461 
7462   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7463     const unsigned ValNo = ArgLocs[I].getValNo();
7464     SDValue Arg = OutVals[ValNo];
7465     ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7466 
7467     if (Flags.isByVal()) {
7468       const unsigned ByValSize = Flags.getByValSize();
7469 
7470       // Nothing to do for zero-sized ByVals on the caller side.
7471       if (!ByValSize) {
7472         ++I;
7473         continue;
7474       }
7475 
7476       auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7477         return DAG.getExtLoad(
7478             ISD::ZEXTLOAD, dl, PtrVT, Chain,
7479             (LoadOffset != 0)
7480                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7481                 : Arg,
7482             MachinePointerInfo(), VT);
7483       };
7484 
7485       unsigned LoadOffset = 0;
7486 
7487       // Initialize registers, which are fully occupied by the by-val argument.
7488       while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7489         SDValue Load = GetLoad(PtrVT, LoadOffset);
7490         MemOpChains.push_back(Load.getValue(1));
7491         LoadOffset += PtrByteSize;
7492         const CCValAssign &ByValVA = ArgLocs[I++];
7493         assert(ByValVA.getValNo() == ValNo &&
7494                "Unexpected location for pass-by-value argument.");
7495         RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7496       }
7497 
7498       if (LoadOffset == ByValSize)
7499         continue;
7500 
7501       // There must be one more loc to handle the remainder.
7502       assert(ArgLocs[I].getValNo() == ValNo &&
7503              "Expected additional location for by-value argument.");
7504 
7505       if (ArgLocs[I].isMemLoc()) {
7506         assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7507         const CCValAssign &ByValVA = ArgLocs[I++];
7508         ISD::ArgFlagsTy MemcpyFlags = Flags;
7509         // Only memcpy the bytes that don't pass in register.
7510         MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7511         Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7512             (LoadOffset != 0)
7513                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7514                 : Arg,
7515             DAG.getObjectPtrOffset(dl, StackPtr,
7516                                    TypeSize::Fixed(ByValVA.getLocMemOffset())),
7517             CallSeqStart, MemcpyFlags, DAG, dl);
7518         continue;
7519       }
7520 
7521       // Initialize the final register residue.
7522       // Any residue that occupies the final by-val arg register must be
7523       // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7524       // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7525       // 2 and 1 byte loads.
7526       const unsigned ResidueBytes = ByValSize % PtrByteSize;
7527       assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7528              "Unexpected register residue for by-value argument.");
7529       SDValue ResidueVal;
7530       for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7531         const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7532         const MVT VT =
7533             N == 1 ? MVT::i8
7534                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7535         SDValue Load = GetLoad(VT, LoadOffset);
7536         MemOpChains.push_back(Load.getValue(1));
7537         LoadOffset += N;
7538         Bytes += N;
7539 
7540         // By-val arguments are passed left-justfied in register.
7541         // Every load here needs to be shifted, otherwise a full register load
7542         // should have been used.
7543         assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7544                "Unexpected load emitted during handling of pass-by-value "
7545                "argument.");
7546         unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7547         EVT ShiftAmountTy =
7548             getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7549         SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7550         SDValue ShiftedLoad =
7551             DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7552         ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7553                                               ShiftedLoad)
7554                                 : ShiftedLoad;
7555       }
7556 
7557       const CCValAssign &ByValVA = ArgLocs[I++];
7558       RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7559       continue;
7560     }
7561 
7562     CCValAssign &VA = ArgLocs[I++];
7563     const MVT LocVT = VA.getLocVT();
7564     const MVT ValVT = VA.getValVT();
7565 
7566     switch (VA.getLocInfo()) {
7567     default:
7568       report_fatal_error("Unexpected argument extension type.");
7569     case CCValAssign::Full:
7570       break;
7571     case CCValAssign::ZExt:
7572       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7573       break;
7574     case CCValAssign::SExt:
7575       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7576       break;
7577     }
7578 
7579     if (VA.isRegLoc() && !VA.needsCustom()) {
7580       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7581       continue;
7582     }
7583 
7584     if (VA.isMemLoc()) {
7585       SDValue PtrOff =
7586           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7587       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7588       MemOpChains.push_back(
7589           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7590 
7591       continue;
7592     }
7593 
7594     // Custom handling is used for GPR initializations for vararg float
7595     // arguments.
7596     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7597            ValVT.isFloatingPoint() && LocVT.isInteger() &&
7598            "Unexpected register handling for calling convention.");
7599 
7600     SDValue ArgAsInt =
7601         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7602 
7603     if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7604       // f32 in 32-bit GPR
7605       // f64 in 64-bit GPR
7606       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7607     else if (Arg.getValueType().getFixedSizeInBits() <
7608              LocVT.getFixedSizeInBits())
7609       // f32 in 64-bit GPR.
7610       RegsToPass.push_back(std::make_pair(
7611           VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7612     else {
7613       // f64 in two 32-bit GPRs
7614       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7615       assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7616              "Unexpected custom register for argument!");
7617       CCValAssign &GPR1 = VA;
7618       SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7619                                      DAG.getConstant(32, dl, MVT::i8));
7620       RegsToPass.push_back(std::make_pair(
7621           GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7622 
7623       if (I != E) {
7624         // If only 1 GPR was available, there will only be one custom GPR and
7625         // the argument will also pass in memory.
7626         CCValAssign &PeekArg = ArgLocs[I];
7627         if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7628           assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7629           CCValAssign &GPR2 = ArgLocs[I++];
7630           RegsToPass.push_back(std::make_pair(
7631               GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7632         }
7633       }
7634     }
7635   }
7636 
7637   if (!MemOpChains.empty())
7638     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7639 
7640   // For indirect calls, we need to save the TOC base to the stack for
7641   // restoration after the call.
7642   if (CFlags.IsIndirect) {
7643     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7644     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7645     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7646     const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7647     const unsigned TOCSaveOffset =
7648         Subtarget.getFrameLowering()->getTOCSaveOffset();
7649 
7650     setUsesTOCBasePtr(DAG);
7651     SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7652     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7653     SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7654     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7655     Chain = DAG.getStore(
7656         Val.getValue(1), dl, Val, AddPtr,
7657         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7658   }
7659 
7660   // Build a sequence of copy-to-reg nodes chained together with token chain
7661   // and flag operands which copy the outgoing args into the appropriate regs.
7662   SDValue InFlag;
7663   for (auto Reg : RegsToPass) {
7664     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7665     InFlag = Chain.getValue(1);
7666   }
7667 
7668   const int SPDiff = 0;
7669   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7670                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
7671 }
7672 
7673 bool
7674 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7675                                   MachineFunction &MF, bool isVarArg,
7676                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
7677                                   LLVMContext &Context) const {
7678   SmallVector<CCValAssign, 16> RVLocs;
7679   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7680   return CCInfo.CheckReturn(
7681       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7682                 ? RetCC_PPC_Cold
7683                 : RetCC_PPC);
7684 }
7685 
7686 SDValue
7687 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7688                                bool isVarArg,
7689                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7690                                const SmallVectorImpl<SDValue> &OutVals,
7691                                const SDLoc &dl, SelectionDAG &DAG) const {
7692   SmallVector<CCValAssign, 16> RVLocs;
7693   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7694                  *DAG.getContext());
7695   CCInfo.AnalyzeReturn(Outs,
7696                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7697                            ? RetCC_PPC_Cold
7698                            : RetCC_PPC);
7699 
7700   SDValue Flag;
7701   SmallVector<SDValue, 4> RetOps(1, Chain);
7702 
7703   // Copy the result values into the output registers.
7704   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7705     CCValAssign &VA = RVLocs[i];
7706     assert(VA.isRegLoc() && "Can only return in registers!");
7707 
7708     SDValue Arg = OutVals[RealResIdx];
7709 
7710     if (Subtarget.isAIXABI() &&
7711         (VA.getLocVT().isVector() || VA.getValVT().isVector()))
7712       report_fatal_error("Returning vector types not yet supported on AIX.");
7713 
7714     switch (VA.getLocInfo()) {
7715     default: llvm_unreachable("Unknown loc info!");
7716     case CCValAssign::Full: break;
7717     case CCValAssign::AExt:
7718       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7719       break;
7720     case CCValAssign::ZExt:
7721       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7722       break;
7723     case CCValAssign::SExt:
7724       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7725       break;
7726     }
7727     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7728       bool isLittleEndian = Subtarget.isLittleEndian();
7729       // Legalize ret f64 -> ret 2 x i32.
7730       SDValue SVal =
7731           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7732                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7733       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7734       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7735       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7736                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7737       Flag = Chain.getValue(1);
7738       VA = RVLocs[++i]; // skip ahead to next loc
7739       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7740     } else
7741       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7742     Flag = Chain.getValue(1);
7743     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7744   }
7745 
7746   RetOps[0] = Chain;  // Update chain.
7747 
7748   // Add the flag if we have it.
7749   if (Flag.getNode())
7750     RetOps.push_back(Flag);
7751 
7752   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7753 }
7754 
7755 SDValue
7756 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7757                                                 SelectionDAG &DAG) const {
7758   SDLoc dl(Op);
7759 
7760   // Get the correct type for integers.
7761   EVT IntVT = Op.getValueType();
7762 
7763   // Get the inputs.
7764   SDValue Chain = Op.getOperand(0);
7765   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7766   // Build a DYNAREAOFFSET node.
7767   SDValue Ops[2] = {Chain, FPSIdx};
7768   SDVTList VTs = DAG.getVTList(IntVT);
7769   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7770 }
7771 
7772 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7773                                              SelectionDAG &DAG) const {
7774   // When we pop the dynamic allocation we need to restore the SP link.
7775   SDLoc dl(Op);
7776 
7777   // Get the correct type for pointers.
7778   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7779 
7780   // Construct the stack pointer operand.
7781   bool isPPC64 = Subtarget.isPPC64();
7782   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7783   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7784 
7785   // Get the operands for the STACKRESTORE.
7786   SDValue Chain = Op.getOperand(0);
7787   SDValue SaveSP = Op.getOperand(1);
7788 
7789   // Load the old link SP.
7790   SDValue LoadLinkSP =
7791       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7792 
7793   // Restore the stack pointer.
7794   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7795 
7796   // Store the old link SP.
7797   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7798 }
7799 
7800 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7801   MachineFunction &MF = DAG.getMachineFunction();
7802   bool isPPC64 = Subtarget.isPPC64();
7803   EVT PtrVT = getPointerTy(MF.getDataLayout());
7804 
7805   // Get current frame pointer save index.  The users of this index will be
7806   // primarily DYNALLOC instructions.
7807   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7808   int RASI = FI->getReturnAddrSaveIndex();
7809 
7810   // If the frame pointer save index hasn't been defined yet.
7811   if (!RASI) {
7812     // Find out what the fix offset of the frame pointer save area.
7813     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7814     // Allocate the frame index for frame pointer save area.
7815     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7816     // Save the result.
7817     FI->setReturnAddrSaveIndex(RASI);
7818   }
7819   return DAG.getFrameIndex(RASI, PtrVT);
7820 }
7821 
7822 SDValue
7823 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7824   MachineFunction &MF = DAG.getMachineFunction();
7825   bool isPPC64 = Subtarget.isPPC64();
7826   EVT PtrVT = getPointerTy(MF.getDataLayout());
7827 
7828   // Get current frame pointer save index.  The users of this index will be
7829   // primarily DYNALLOC instructions.
7830   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7831   int FPSI = FI->getFramePointerSaveIndex();
7832 
7833   // If the frame pointer save index hasn't been defined yet.
7834   if (!FPSI) {
7835     // Find out what the fix offset of the frame pointer save area.
7836     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7837     // Allocate the frame index for frame pointer save area.
7838     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7839     // Save the result.
7840     FI->setFramePointerSaveIndex(FPSI);
7841   }
7842   return DAG.getFrameIndex(FPSI, PtrVT);
7843 }
7844 
7845 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7846                                                    SelectionDAG &DAG) const {
7847   MachineFunction &MF = DAG.getMachineFunction();
7848   // Get the inputs.
7849   SDValue Chain = Op.getOperand(0);
7850   SDValue Size  = Op.getOperand(1);
7851   SDLoc dl(Op);
7852 
7853   // Get the correct type for pointers.
7854   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7855   // Negate the size.
7856   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7857                                 DAG.getConstant(0, dl, PtrVT), Size);
7858   // Construct a node for the frame pointer save index.
7859   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7860   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7861   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7862   if (hasInlineStackProbe(MF))
7863     return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7864   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7865 }
7866 
7867 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7868                                                      SelectionDAG &DAG) const {
7869   MachineFunction &MF = DAG.getMachineFunction();
7870 
7871   bool isPPC64 = Subtarget.isPPC64();
7872   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7873 
7874   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7875   return DAG.getFrameIndex(FI, PtrVT);
7876 }
7877 
7878 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7879                                                SelectionDAG &DAG) const {
7880   SDLoc DL(Op);
7881   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7882                      DAG.getVTList(MVT::i32, MVT::Other),
7883                      Op.getOperand(0), Op.getOperand(1));
7884 }
7885 
7886 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7887                                                 SelectionDAG &DAG) const {
7888   SDLoc DL(Op);
7889   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7890                      Op.getOperand(0), Op.getOperand(1));
7891 }
7892 
7893 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7894   if (Op.getValueType().isVector())
7895     return LowerVectorLoad(Op, DAG);
7896 
7897   assert(Op.getValueType() == MVT::i1 &&
7898          "Custom lowering only for i1 loads");
7899 
7900   // First, load 8 bits into 32 bits, then truncate to 1 bit.
7901 
7902   SDLoc dl(Op);
7903   LoadSDNode *LD = cast<LoadSDNode>(Op);
7904 
7905   SDValue Chain = LD->getChain();
7906   SDValue BasePtr = LD->getBasePtr();
7907   MachineMemOperand *MMO = LD->getMemOperand();
7908 
7909   SDValue NewLD =
7910       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7911                      BasePtr, MVT::i8, MMO);
7912   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7913 
7914   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7915   return DAG.getMergeValues(Ops, dl);
7916 }
7917 
7918 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7919   if (Op.getOperand(1).getValueType().isVector())
7920     return LowerVectorStore(Op, DAG);
7921 
7922   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7923          "Custom lowering only for i1 stores");
7924 
7925   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7926 
7927   SDLoc dl(Op);
7928   StoreSDNode *ST = cast<StoreSDNode>(Op);
7929 
7930   SDValue Chain = ST->getChain();
7931   SDValue BasePtr = ST->getBasePtr();
7932   SDValue Value = ST->getValue();
7933   MachineMemOperand *MMO = ST->getMemOperand();
7934 
7935   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7936                       Value);
7937   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7938 }
7939 
7940 // FIXME: Remove this once the ANDI glue bug is fixed:
7941 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7942   assert(Op.getValueType() == MVT::i1 &&
7943          "Custom lowering only for i1 results");
7944 
7945   SDLoc DL(Op);
7946   return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7947 }
7948 
7949 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7950                                                SelectionDAG &DAG) const {
7951 
7952   // Implements a vector truncate that fits in a vector register as a shuffle.
7953   // We want to legalize vector truncates down to where the source fits in
7954   // a vector register (and target is therefore smaller than vector register
7955   // size).  At that point legalization will try to custom lower the sub-legal
7956   // result and get here - where we can contain the truncate as a single target
7957   // operation.
7958 
7959   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7960   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7961   //
7962   // We will implement it for big-endian ordering as this (where x denotes
7963   // undefined):
7964   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7965   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7966   //
7967   // The same operation in little-endian ordering will be:
7968   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7969   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7970 
7971   EVT TrgVT = Op.getValueType();
7972   assert(TrgVT.isVector() && "Vector type expected.");
7973   unsigned TrgNumElts = TrgVT.getVectorNumElements();
7974   EVT EltVT = TrgVT.getVectorElementType();
7975   if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7976       TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7977       !isPowerOf2_32(EltVT.getSizeInBits()))
7978     return SDValue();
7979 
7980   SDValue N1 = Op.getOperand(0);
7981   EVT SrcVT = N1.getValueType();
7982   unsigned SrcSize = SrcVT.getSizeInBits();
7983   if (SrcSize > 256 ||
7984       !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7985       !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
7986     return SDValue();
7987   if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7988     return SDValue();
7989 
7990   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7991   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7992 
7993   SDLoc DL(Op);
7994   SDValue Op1, Op2;
7995   if (SrcSize == 256) {
7996     EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7997     EVT SplitVT =
7998         N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
7999     unsigned SplitNumElts = SplitVT.getVectorNumElements();
8000     Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8001                       DAG.getConstant(0, DL, VecIdxTy));
8002     Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8003                       DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8004   }
8005   else {
8006     Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8007     Op2 = DAG.getUNDEF(WideVT);
8008   }
8009 
8010   // First list the elements we want to keep.
8011   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8012   SmallVector<int, 16> ShuffV;
8013   if (Subtarget.isLittleEndian())
8014     for (unsigned i = 0; i < TrgNumElts; ++i)
8015       ShuffV.push_back(i * SizeMult);
8016   else
8017     for (unsigned i = 1; i <= TrgNumElts; ++i)
8018       ShuffV.push_back(i * SizeMult - 1);
8019 
8020   // Populate the remaining elements with undefs.
8021   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8022     // ShuffV.push_back(i + WideNumElts);
8023     ShuffV.push_back(WideNumElts + 1);
8024 
8025   Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8026   Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8027   return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8028 }
8029 
8030 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8031 /// possible.
8032 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8033   // Not FP, or using SPE? Not a fsel.
8034   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
8035       !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
8036     return Op;
8037 
8038   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8039 
8040   EVT ResVT = Op.getValueType();
8041   EVT CmpVT = Op.getOperand(0).getValueType();
8042   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8043   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
8044   SDLoc dl(Op);
8045   SDNodeFlags Flags = Op.getNode()->getFlags();
8046 
8047   // We have xsmaxcdp/xsmincdp which are OK to emit even in the
8048   // presence of infinities.
8049   if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8050     switch (CC) {
8051     default:
8052       break;
8053     case ISD::SETOGT:
8054     case ISD::SETGT:
8055       return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
8056     case ISD::SETOLT:
8057     case ISD::SETLT:
8058       return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
8059     }
8060   }
8061 
8062   // We might be able to do better than this under some circumstances, but in
8063   // general, fsel-based lowering of select is a finite-math-only optimization.
8064   // For more information, see section F.3 of the 2.06 ISA specification.
8065   // With ISA 3.0
8066   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8067       (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
8068     return Op;
8069 
8070   // If the RHS of the comparison is a 0.0, we don't need to do the
8071   // subtraction at all.
8072   SDValue Sel1;
8073   if (isFloatingPointZero(RHS))
8074     switch (CC) {
8075     default: break;       // SETUO etc aren't handled by fsel.
8076     case ISD::SETNE:
8077       std::swap(TV, FV);
8078       LLVM_FALLTHROUGH;
8079     case ISD::SETEQ:
8080       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8081         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8082       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8083       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8084         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8085       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8086                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8087     case ISD::SETULT:
8088     case ISD::SETLT:
8089       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8090       LLVM_FALLTHROUGH;
8091     case ISD::SETOGE:
8092     case ISD::SETGE:
8093       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8094         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8095       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8096     case ISD::SETUGT:
8097     case ISD::SETGT:
8098       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8099       LLVM_FALLTHROUGH;
8100     case ISD::SETOLE:
8101     case ISD::SETLE:
8102       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8103         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8104       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8105                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8106     }
8107 
8108   SDValue Cmp;
8109   switch (CC) {
8110   default: break;       // SETUO etc aren't handled by fsel.
8111   case ISD::SETNE:
8112     std::swap(TV, FV);
8113     LLVM_FALLTHROUGH;
8114   case ISD::SETEQ:
8115     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8116     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8117       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8118     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8119     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8120       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8121     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8122                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8123   case ISD::SETULT:
8124   case ISD::SETLT:
8125     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8126     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8127       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8128     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8129   case ISD::SETOGE:
8130   case ISD::SETGE:
8131     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8132     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8133       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8134     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8135   case ISD::SETUGT:
8136   case ISD::SETGT:
8137     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8138     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8139       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8140     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8141   case ISD::SETOLE:
8142   case ISD::SETLE:
8143     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8144     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8145       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8146     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8147   }
8148   return Op;
8149 }
8150 
8151 static unsigned getPPCStrictOpcode(unsigned Opc) {
8152   switch (Opc) {
8153   default:
8154     llvm_unreachable("No strict version of this opcode!");
8155   case PPCISD::FCTIDZ:
8156     return PPCISD::STRICT_FCTIDZ;
8157   case PPCISD::FCTIWZ:
8158     return PPCISD::STRICT_FCTIWZ;
8159   case PPCISD::FCTIDUZ:
8160     return PPCISD::STRICT_FCTIDUZ;
8161   case PPCISD::FCTIWUZ:
8162     return PPCISD::STRICT_FCTIWUZ;
8163   case PPCISD::FCFID:
8164     return PPCISD::STRICT_FCFID;
8165   case PPCISD::FCFIDU:
8166     return PPCISD::STRICT_FCFIDU;
8167   case PPCISD::FCFIDS:
8168     return PPCISD::STRICT_FCFIDS;
8169   case PPCISD::FCFIDUS:
8170     return PPCISD::STRICT_FCFIDUS;
8171   }
8172 }
8173 
8174 static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8175                               const PPCSubtarget &Subtarget) {
8176   SDLoc dl(Op);
8177   bool IsStrict = Op->isStrictFPOpcode();
8178   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8179                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8180 
8181   // TODO: Any other flags to propagate?
8182   SDNodeFlags Flags;
8183   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8184 
8185   // For strict nodes, source is the second operand.
8186   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8187   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8188   assert(Src.getValueType().isFloatingPoint());
8189   if (Src.getValueType() == MVT::f32) {
8190     if (IsStrict) {
8191       Src =
8192           DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8193                       DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8194       Chain = Src.getValue(1);
8195     } else
8196       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8197   }
8198   SDValue Conv;
8199   unsigned Opc = ISD::DELETED_NODE;
8200   switch (Op.getSimpleValueType().SimpleTy) {
8201   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8202   case MVT::i32:
8203     Opc = IsSigned ? PPCISD::FCTIWZ
8204                    : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8205     break;
8206   case MVT::i64:
8207     assert((IsSigned || Subtarget.hasFPCVT()) &&
8208            "i64 FP_TO_UINT is supported only with FPCVT");
8209     Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8210   }
8211   if (IsStrict) {
8212     Opc = getPPCStrictOpcode(Opc);
8213     Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8214                        {Chain, Src}, Flags);
8215   } else {
8216     Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8217   }
8218   return Conv;
8219 }
8220 
8221 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8222                                                SelectionDAG &DAG,
8223                                                const SDLoc &dl) const {
8224   SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8225   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8226                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8227   bool IsStrict = Op->isStrictFPOpcode();
8228 
8229   // Convert the FP value to an int value through memory.
8230   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8231                   (IsSigned || Subtarget.hasFPCVT());
8232   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8233   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8234   MachinePointerInfo MPI =
8235       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8236 
8237   // Emit a store to the stack slot.
8238   SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8239   Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8240   if (i32Stack) {
8241     MachineFunction &MF = DAG.getMachineFunction();
8242     Alignment = Align(4);
8243     MachineMemOperand *MMO =
8244         MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8245     SDValue Ops[] = { Chain, Tmp, FIPtr };
8246     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8247               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8248   } else
8249     Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8250 
8251   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
8252   // add in a bias on big endian.
8253   if (Op.getValueType() == MVT::i32 && !i32Stack) {
8254     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8255                         DAG.getConstant(4, dl, FIPtr.getValueType()));
8256     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8257   }
8258 
8259   RLI.Chain = Chain;
8260   RLI.Ptr = FIPtr;
8261   RLI.MPI = MPI;
8262   RLI.Alignment = Alignment;
8263 }
8264 
8265 /// Custom lowers floating point to integer conversions to use
8266 /// the direct move instructions available in ISA 2.07 to avoid the
8267 /// need for load/store combinations.
8268 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8269                                                     SelectionDAG &DAG,
8270                                                     const SDLoc &dl) const {
8271   SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8272   SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8273   if (Op->isStrictFPOpcode())
8274     return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8275   else
8276     return Mov;
8277 }
8278 
8279 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8280                                           const SDLoc &dl) const {
8281   bool IsStrict = Op->isStrictFPOpcode();
8282   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8283                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8284   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8285   EVT SrcVT = Src.getValueType();
8286   EVT DstVT = Op.getValueType();
8287 
8288   // FP to INT conversions are legal for f128.
8289   if (SrcVT == MVT::f128)
8290     return Op;
8291 
8292   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8293   // PPC (the libcall is not available).
8294   if (SrcVT == MVT::ppcf128) {
8295     if (DstVT == MVT::i32) {
8296       // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8297       // set other fast-math flags to FP operations in both strict and
8298       // non-strict cases. (FP_TO_SINT, FSUB)
8299       SDNodeFlags Flags;
8300       Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8301 
8302       if (IsSigned) {
8303         SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8304                                  DAG.getIntPtrConstant(0, dl));
8305         SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8306                                  DAG.getIntPtrConstant(1, dl));
8307 
8308         // Add the two halves of the long double in round-to-zero mode, and use
8309         // a smaller FP_TO_SINT.
8310         if (IsStrict) {
8311           SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8312                                     DAG.getVTList(MVT::f64, MVT::Other),
8313                                     {Op.getOperand(0), Lo, Hi}, Flags);
8314           return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8315                              DAG.getVTList(MVT::i32, MVT::Other),
8316                              {Res.getValue(1), Res}, Flags);
8317         } else {
8318           SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8319           return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8320         }
8321       } else {
8322         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8323         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8324         SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8325         SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8326         if (IsStrict) {
8327           // Sel = Src < 0x80000000
8328           // FltOfs = select Sel, 0.0, 0x80000000
8329           // IntOfs = select Sel, 0, 0x80000000
8330           // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8331           SDValue Chain = Op.getOperand(0);
8332           EVT SetCCVT =
8333               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8334           EVT DstSetCCVT =
8335               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8336           SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8337                                      Chain, true);
8338           Chain = Sel.getValue(1);
8339 
8340           SDValue FltOfs = DAG.getSelect(
8341               dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8342           Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8343 
8344           SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8345                                     DAG.getVTList(SrcVT, MVT::Other),
8346                                     {Chain, Src, FltOfs}, Flags);
8347           Chain = Val.getValue(1);
8348           SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8349                                      DAG.getVTList(DstVT, MVT::Other),
8350                                      {Chain, Val}, Flags);
8351           Chain = SInt.getValue(1);
8352           SDValue IntOfs = DAG.getSelect(
8353               dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8354           SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8355           return DAG.getMergeValues({Result, Chain}, dl);
8356         } else {
8357           // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8358           // FIXME: generated code sucks.
8359           SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8360           True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8361           True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8362           SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8363           return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8364         }
8365       }
8366     }
8367 
8368     return SDValue();
8369   }
8370 
8371   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8372     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8373 
8374   ReuseLoadInfo RLI;
8375   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8376 
8377   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8378                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8379 }
8380 
8381 // We're trying to insert a regular store, S, and then a load, L. If the
8382 // incoming value, O, is a load, we might just be able to have our load use the
8383 // address used by O. However, we don't know if anything else will store to
8384 // that address before we can load from it. To prevent this situation, we need
8385 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8386 // the same chain operand as O, we create a token factor from the chain results
8387 // of O and L, and we replace all uses of O's chain result with that token
8388 // factor (see spliceIntoChain below for this last part).
8389 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8390                                             ReuseLoadInfo &RLI,
8391                                             SelectionDAG &DAG,
8392                                             ISD::LoadExtType ET) const {
8393   // Conservatively skip reusing for constrained FP nodes.
8394   if (Op->isStrictFPOpcode())
8395     return false;
8396 
8397   SDLoc dl(Op);
8398   bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8399                        (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8400   if (ET == ISD::NON_EXTLOAD &&
8401       (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8402       isOperationLegalOrCustom(Op.getOpcode(),
8403                                Op.getOperand(0).getValueType())) {
8404 
8405     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8406     return true;
8407   }
8408 
8409   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8410   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8411       LD->isNonTemporal())
8412     return false;
8413   if (LD->getMemoryVT() != MemVT)
8414     return false;
8415 
8416   RLI.Ptr = LD->getBasePtr();
8417   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8418     assert(LD->getAddressingMode() == ISD::PRE_INC &&
8419            "Non-pre-inc AM on PPC?");
8420     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8421                           LD->getOffset());
8422   }
8423 
8424   RLI.Chain = LD->getChain();
8425   RLI.MPI = LD->getPointerInfo();
8426   RLI.IsDereferenceable = LD->isDereferenceable();
8427   RLI.IsInvariant = LD->isInvariant();
8428   RLI.Alignment = LD->getAlign();
8429   RLI.AAInfo = LD->getAAInfo();
8430   RLI.Ranges = LD->getRanges();
8431 
8432   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8433   return true;
8434 }
8435 
8436 // Given the head of the old chain, ResChain, insert a token factor containing
8437 // it and NewResChain, and make users of ResChain now be users of that token
8438 // factor.
8439 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8440 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8441                                         SDValue NewResChain,
8442                                         SelectionDAG &DAG) const {
8443   if (!ResChain)
8444     return;
8445 
8446   SDLoc dl(NewResChain);
8447 
8448   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8449                            NewResChain, DAG.getUNDEF(MVT::Other));
8450   assert(TF.getNode() != NewResChain.getNode() &&
8451          "A new TF really is required here");
8452 
8453   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8454   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8455 }
8456 
8457 /// Analyze profitability of direct move
8458 /// prefer float load to int load plus direct move
8459 /// when there is no integer use of int load
8460 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8461   SDNode *Origin = Op.getOperand(0).getNode();
8462   if (Origin->getOpcode() != ISD::LOAD)
8463     return true;
8464 
8465   // If there is no LXSIBZX/LXSIHZX, like Power8,
8466   // prefer direct move if the memory size is 1 or 2 bytes.
8467   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8468   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8469     return true;
8470 
8471   for (SDNode::use_iterator UI = Origin->use_begin(),
8472                             UE = Origin->use_end();
8473        UI != UE; ++UI) {
8474 
8475     // Only look at the users of the loaded value.
8476     if (UI.getUse().get().getResNo() != 0)
8477       continue;
8478 
8479     if (UI->getOpcode() != ISD::SINT_TO_FP &&
8480         UI->getOpcode() != ISD::UINT_TO_FP &&
8481         UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8482         UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8483       return true;
8484   }
8485 
8486   return false;
8487 }
8488 
8489 static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8490                               const PPCSubtarget &Subtarget,
8491                               SDValue Chain = SDValue()) {
8492   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8493                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8494   SDLoc dl(Op);
8495 
8496   // TODO: Any other flags to propagate?
8497   SDNodeFlags Flags;
8498   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8499 
8500   // If we have FCFIDS, then use it when converting to single-precision.
8501   // Otherwise, convert to double-precision and then round.
8502   bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8503   unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8504                               : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8505   EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8506   if (Op->isStrictFPOpcode()) {
8507     if (!Chain)
8508       Chain = Op.getOperand(0);
8509     return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8510                        DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8511   } else
8512     return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8513 }
8514 
8515 /// Custom lowers integer to floating point conversions to use
8516 /// the direct move instructions available in ISA 2.07 to avoid the
8517 /// need for load/store combinations.
8518 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8519                                                     SelectionDAG &DAG,
8520                                                     const SDLoc &dl) const {
8521   assert((Op.getValueType() == MVT::f32 ||
8522           Op.getValueType() == MVT::f64) &&
8523          "Invalid floating point type as target of conversion");
8524   assert(Subtarget.hasFPCVT() &&
8525          "Int to FP conversions with direct moves require FPCVT");
8526   SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8527   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8528   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8529                 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8530   unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8531   SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8532   return convertIntToFP(Op, Mov, DAG, Subtarget);
8533 }
8534 
8535 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8536 
8537   EVT VecVT = Vec.getValueType();
8538   assert(VecVT.isVector() && "Expected a vector type.");
8539   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8540 
8541   EVT EltVT = VecVT.getVectorElementType();
8542   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8543   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8544 
8545   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8546   SmallVector<SDValue, 16> Ops(NumConcat);
8547   Ops[0] = Vec;
8548   SDValue UndefVec = DAG.getUNDEF(VecVT);
8549   for (unsigned i = 1; i < NumConcat; ++i)
8550     Ops[i] = UndefVec;
8551 
8552   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8553 }
8554 
8555 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8556                                                 const SDLoc &dl) const {
8557   bool IsStrict = Op->isStrictFPOpcode();
8558   unsigned Opc = Op.getOpcode();
8559   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8560   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8561           Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8562          "Unexpected conversion type");
8563   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8564          "Supports conversions to v2f64/v4f32 only.");
8565 
8566   // TODO: Any other flags to propagate?
8567   SDNodeFlags Flags;
8568   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8569 
8570   bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8571   bool FourEltRes = Op.getValueType() == MVT::v4f32;
8572 
8573   SDValue Wide = widenVec(DAG, Src, dl);
8574   EVT WideVT = Wide.getValueType();
8575   unsigned WideNumElts = WideVT.getVectorNumElements();
8576   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8577 
8578   SmallVector<int, 16> ShuffV;
8579   for (unsigned i = 0; i < WideNumElts; ++i)
8580     ShuffV.push_back(i + WideNumElts);
8581 
8582   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8583   int SaveElts = FourEltRes ? 4 : 2;
8584   if (Subtarget.isLittleEndian())
8585     for (int i = 0; i < SaveElts; i++)
8586       ShuffV[i * Stride] = i;
8587   else
8588     for (int i = 1; i <= SaveElts; i++)
8589       ShuffV[i * Stride - 1] = i - 1;
8590 
8591   SDValue ShuffleSrc2 =
8592       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8593   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8594 
8595   SDValue Extend;
8596   if (SignedConv) {
8597     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8598     EVT ExtVT = Src.getValueType();
8599     if (Subtarget.hasP9Altivec())
8600       ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8601                                IntermediateVT.getVectorNumElements());
8602 
8603     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8604                          DAG.getValueType(ExtVT));
8605   } else
8606     Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8607 
8608   if (IsStrict)
8609     return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8610                        {Op.getOperand(0), Extend}, Flags);
8611 
8612   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8613 }
8614 
8615 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8616                                           SelectionDAG &DAG) const {
8617   SDLoc dl(Op);
8618   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8619                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8620   bool IsStrict = Op->isStrictFPOpcode();
8621   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8622   SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8623 
8624   // TODO: Any other flags to propagate?
8625   SDNodeFlags Flags;
8626   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8627 
8628   EVT InVT = Src.getValueType();
8629   EVT OutVT = Op.getValueType();
8630   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8631       isOperationCustom(Op.getOpcode(), InVT))
8632     return LowerINT_TO_FPVector(Op, DAG, dl);
8633 
8634   // Conversions to f128 are legal.
8635   if (Op.getValueType() == MVT::f128)
8636     return Op;
8637 
8638   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8639   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8640     return SDValue();
8641 
8642   if (Src.getValueType() == MVT::i1)
8643     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8644                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
8645                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
8646 
8647   // If we have direct moves, we can do all the conversion, skip the store/load
8648   // however, without FPCVT we can't do most conversions.
8649   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8650       Subtarget.isPPC64() && Subtarget.hasFPCVT())
8651     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8652 
8653   assert((IsSigned || Subtarget.hasFPCVT()) &&
8654          "UINT_TO_FP is supported only with FPCVT");
8655 
8656   if (Src.getValueType() == MVT::i64) {
8657     SDValue SINT = Src;
8658     // When converting to single-precision, we actually need to convert
8659     // to double-precision first and then round to single-precision.
8660     // To avoid double-rounding effects during that operation, we have
8661     // to prepare the input operand.  Bits that might be truncated when
8662     // converting to double-precision are replaced by a bit that won't
8663     // be lost at this stage, but is below the single-precision rounding
8664     // position.
8665     //
8666     // However, if -enable-unsafe-fp-math is in effect, accept double
8667     // rounding to avoid the extra overhead.
8668     if (Op.getValueType() == MVT::f32 &&
8669         !Subtarget.hasFPCVT() &&
8670         !DAG.getTarget().Options.UnsafeFPMath) {
8671 
8672       // Twiddle input to make sure the low 11 bits are zero.  (If this
8673       // is the case, we are guaranteed the value will fit into the 53 bit
8674       // mantissa of an IEEE double-precision value without rounding.)
8675       // If any of those low 11 bits were not zero originally, make sure
8676       // bit 12 (value 2048) is set instead, so that the final rounding
8677       // to single-precision gets the correct result.
8678       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8679                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
8680       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8681                           Round, DAG.getConstant(2047, dl, MVT::i64));
8682       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8683       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8684                           Round, DAG.getConstant(-2048, dl, MVT::i64));
8685 
8686       // However, we cannot use that value unconditionally: if the magnitude
8687       // of the input value is small, the bit-twiddling we did above might
8688       // end up visibly changing the output.  Fortunately, in that case, we
8689       // don't need to twiddle bits since the original input will convert
8690       // exactly to double-precision floating-point already.  Therefore,
8691       // construct a conditional to use the original value if the top 11
8692       // bits are all sign-bit copies, and use the rounded value computed
8693       // above otherwise.
8694       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8695                                  SINT, DAG.getConstant(53, dl, MVT::i32));
8696       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8697                          Cond, DAG.getConstant(1, dl, MVT::i64));
8698       Cond = DAG.getSetCC(
8699           dl,
8700           getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8701           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8702 
8703       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8704     }
8705 
8706     ReuseLoadInfo RLI;
8707     SDValue Bits;
8708 
8709     MachineFunction &MF = DAG.getMachineFunction();
8710     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8711       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8712                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8713       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8714     } else if (Subtarget.hasLFIWAX() &&
8715                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8716       MachineMemOperand *MMO =
8717         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8718                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8719       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8720       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8721                                      DAG.getVTList(MVT::f64, MVT::Other),
8722                                      Ops, MVT::i32, MMO);
8723       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8724     } else if (Subtarget.hasFPCVT() &&
8725                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8726       MachineMemOperand *MMO =
8727         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8728                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8729       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8730       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8731                                      DAG.getVTList(MVT::f64, MVT::Other),
8732                                      Ops, MVT::i32, MMO);
8733       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8734     } else if (((Subtarget.hasLFIWAX() &&
8735                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8736                 (Subtarget.hasFPCVT() &&
8737                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8738                SINT.getOperand(0).getValueType() == MVT::i32) {
8739       MachineFrameInfo &MFI = MF.getFrameInfo();
8740       EVT PtrVT = getPointerTy(DAG.getDataLayout());
8741 
8742       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8743       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8744 
8745       SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8746                                    MachinePointerInfo::getFixedStack(
8747                                        DAG.getMachineFunction(), FrameIdx));
8748       Chain = Store;
8749 
8750       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8751              "Expected an i32 store");
8752 
8753       RLI.Ptr = FIdx;
8754       RLI.Chain = Chain;
8755       RLI.MPI =
8756           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8757       RLI.Alignment = Align(4);
8758 
8759       MachineMemOperand *MMO =
8760         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8761                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8762       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8763       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8764                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
8765                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
8766                                      Ops, MVT::i32, MMO);
8767       Chain = Bits.getValue(1);
8768     } else
8769       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8770 
8771     SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8772     if (IsStrict)
8773       Chain = FP.getValue(1);
8774 
8775     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8776       if (IsStrict)
8777         FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8778                          DAG.getVTList(MVT::f32, MVT::Other),
8779                          {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8780       else
8781         FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8782                          DAG.getIntPtrConstant(0, dl));
8783     }
8784     return FP;
8785   }
8786 
8787   assert(Src.getValueType() == MVT::i32 &&
8788          "Unhandled INT_TO_FP type in custom expander!");
8789   // Since we only generate this in 64-bit mode, we can take advantage of
8790   // 64-bit registers.  In particular, sign extend the input value into the
8791   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8792   // then lfd it and fcfid it.
8793   MachineFunction &MF = DAG.getMachineFunction();
8794   MachineFrameInfo &MFI = MF.getFrameInfo();
8795   EVT PtrVT = getPointerTy(MF.getDataLayout());
8796 
8797   SDValue Ld;
8798   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8799     ReuseLoadInfo RLI;
8800     bool ReusingLoad;
8801     if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8802       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8803       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8804 
8805       SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8806                                    MachinePointerInfo::getFixedStack(
8807                                        DAG.getMachineFunction(), FrameIdx));
8808       Chain = Store;
8809 
8810       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8811              "Expected an i32 store");
8812 
8813       RLI.Ptr = FIdx;
8814       RLI.Chain = Chain;
8815       RLI.MPI =
8816           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8817       RLI.Alignment = Align(4);
8818     }
8819 
8820     MachineMemOperand *MMO =
8821       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8822                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8823     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8824     Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8825                                  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8826                                  MVT::i32, MMO);
8827     Chain = Ld.getValue(1);
8828     if (ReusingLoad)
8829       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8830   } else {
8831     assert(Subtarget.isPPC64() &&
8832            "i32->FP without LFIWAX supported only on PPC64");
8833 
8834     int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8835     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8836 
8837     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8838 
8839     // STD the extended value into the stack slot.
8840     SDValue Store = DAG.getStore(
8841         Chain, dl, Ext64, FIdx,
8842         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8843     Chain = Store;
8844 
8845     // Load the value as a double.
8846     Ld = DAG.getLoad(
8847         MVT::f64, dl, Chain, FIdx,
8848         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8849     Chain = Ld.getValue(1);
8850   }
8851 
8852   // FCFID it and return it.
8853   SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8854   if (IsStrict)
8855     Chain = FP.getValue(1);
8856   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8857     if (IsStrict)
8858       FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8859                        DAG.getVTList(MVT::f32, MVT::Other),
8860                        {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8861     else
8862       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8863                        DAG.getIntPtrConstant(0, dl));
8864   }
8865   return FP;
8866 }
8867 
8868 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8869                                             SelectionDAG &DAG) const {
8870   SDLoc dl(Op);
8871   /*
8872    The rounding mode is in bits 30:31 of FPSR, and has the following
8873    settings:
8874      00 Round to nearest
8875      01 Round to 0
8876      10 Round to +inf
8877      11 Round to -inf
8878 
8879   FLT_ROUNDS, on the other hand, expects the following:
8880     -1 Undefined
8881      0 Round to 0
8882      1 Round to nearest
8883      2 Round to +inf
8884      3 Round to -inf
8885 
8886   To perform the conversion, we do:
8887     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8888   */
8889 
8890   MachineFunction &MF = DAG.getMachineFunction();
8891   EVT VT = Op.getValueType();
8892   EVT PtrVT = getPointerTy(MF.getDataLayout());
8893 
8894   // Save FP Control Word to register
8895   SDValue Chain = Op.getOperand(0);
8896   SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8897   Chain = MFFS.getValue(1);
8898 
8899   // Save FP register to stack slot
8900   int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8901   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8902   Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8903 
8904   // Load FP Control Word from low 32 bits of stack slot.
8905   SDValue Four = DAG.getConstant(4, dl, PtrVT);
8906   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8907   SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8908   Chain = CWD.getValue(1);
8909 
8910   // Transform as necessary
8911   SDValue CWD1 =
8912     DAG.getNode(ISD::AND, dl, MVT::i32,
8913                 CWD, DAG.getConstant(3, dl, MVT::i32));
8914   SDValue CWD2 =
8915     DAG.getNode(ISD::SRL, dl, MVT::i32,
8916                 DAG.getNode(ISD::AND, dl, MVT::i32,
8917                             DAG.getNode(ISD::XOR, dl, MVT::i32,
8918                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
8919                             DAG.getConstant(3, dl, MVT::i32)),
8920                 DAG.getConstant(1, dl, MVT::i32));
8921 
8922   SDValue RetVal =
8923     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8924 
8925   RetVal =
8926       DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8927                   dl, VT, RetVal);
8928 
8929   return DAG.getMergeValues({RetVal, Chain}, dl);
8930 }
8931 
8932 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8933   EVT VT = Op.getValueType();
8934   unsigned BitWidth = VT.getSizeInBits();
8935   SDLoc dl(Op);
8936   assert(Op.getNumOperands() == 3 &&
8937          VT == Op.getOperand(1).getValueType() &&
8938          "Unexpected SHL!");
8939 
8940   // Expand into a bunch of logical ops.  Note that these ops
8941   // depend on the PPC behavior for oversized shift amounts.
8942   SDValue Lo = Op.getOperand(0);
8943   SDValue Hi = Op.getOperand(1);
8944   SDValue Amt = Op.getOperand(2);
8945   EVT AmtVT = Amt.getValueType();
8946 
8947   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8948                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8949   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8950   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8951   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8952   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8953                              DAG.getConstant(-BitWidth, dl, AmtVT));
8954   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8955   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8956   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8957   SDValue OutOps[] = { OutLo, OutHi };
8958   return DAG.getMergeValues(OutOps, dl);
8959 }
8960 
8961 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8962   EVT VT = Op.getValueType();
8963   SDLoc dl(Op);
8964   unsigned BitWidth = VT.getSizeInBits();
8965   assert(Op.getNumOperands() == 3 &&
8966          VT == Op.getOperand(1).getValueType() &&
8967          "Unexpected SRL!");
8968 
8969   // Expand into a bunch of logical ops.  Note that these ops
8970   // depend on the PPC behavior for oversized shift amounts.
8971   SDValue Lo = Op.getOperand(0);
8972   SDValue Hi = Op.getOperand(1);
8973   SDValue Amt = Op.getOperand(2);
8974   EVT AmtVT = Amt.getValueType();
8975 
8976   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8977                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8978   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8979   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8980   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8981   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8982                              DAG.getConstant(-BitWidth, dl, AmtVT));
8983   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8984   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8985   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8986   SDValue OutOps[] = { OutLo, OutHi };
8987   return DAG.getMergeValues(OutOps, dl);
8988 }
8989 
8990 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8991   SDLoc dl(Op);
8992   EVT VT = Op.getValueType();
8993   unsigned BitWidth = VT.getSizeInBits();
8994   assert(Op.getNumOperands() == 3 &&
8995          VT == Op.getOperand(1).getValueType() &&
8996          "Unexpected SRA!");
8997 
8998   // Expand into a bunch of logical ops, followed by a select_cc.
8999   SDValue Lo = Op.getOperand(0);
9000   SDValue Hi = Op.getOperand(1);
9001   SDValue Amt = Op.getOperand(2);
9002   EVT AmtVT = Amt.getValueType();
9003 
9004   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9005                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9006   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9007   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9008   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9009   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9010                              DAG.getConstant(-BitWidth, dl, AmtVT));
9011   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9012   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9013   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9014                                   Tmp4, Tmp6, ISD::SETLE);
9015   SDValue OutOps[] = { OutLo, OutHi };
9016   return DAG.getMergeValues(OutOps, dl);
9017 }
9018 
9019 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9020                                             SelectionDAG &DAG) const {
9021   SDLoc dl(Op);
9022   EVT VT = Op.getValueType();
9023   unsigned BitWidth = VT.getSizeInBits();
9024 
9025   bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9026   SDValue X = Op.getOperand(0);
9027   SDValue Y = Op.getOperand(1);
9028   SDValue Z = Op.getOperand(2);
9029   EVT AmtVT = Z.getValueType();
9030 
9031   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9032   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9033   // This is simpler than TargetLowering::expandFunnelShift because we can rely
9034   // on PowerPC shift by BW being well defined.
9035   Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9036                   DAG.getConstant(BitWidth - 1, dl, AmtVT));
9037   SDValue SubZ =
9038       DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9039   X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9040   Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9041   return DAG.getNode(ISD::OR, dl, VT, X, Y);
9042 }
9043 
9044 //===----------------------------------------------------------------------===//
9045 // Vector related lowering.
9046 //
9047 
9048 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9049 /// element size of SplatSize. Cast the result to VT.
9050 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9051                                       SelectionDAG &DAG, const SDLoc &dl) {
9052   static const MVT VTys[] = { // canonical VT to use for each size.
9053     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9054   };
9055 
9056   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9057 
9058   // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9059   if (Val == ((1LU << (SplatSize * 8)) - 1)) {
9060     SplatSize = 1;
9061     Val = 0xFF;
9062   }
9063 
9064   EVT CanonicalVT = VTys[SplatSize-1];
9065 
9066   // Build a canonical splat for this value.
9067   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9068 }
9069 
9070 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9071 /// specified intrinsic ID.
9072 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9073                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
9074   if (DestVT == MVT::Other) DestVT = Op.getValueType();
9075   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9076                      DAG.getConstant(IID, dl, MVT::i32), Op);
9077 }
9078 
9079 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9080 /// specified intrinsic ID.
9081 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9082                                 SelectionDAG &DAG, const SDLoc &dl,
9083                                 EVT DestVT = MVT::Other) {
9084   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9085   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9086                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9087 }
9088 
9089 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9090 /// specified intrinsic ID.
9091 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9092                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9093                                 EVT DestVT = MVT::Other) {
9094   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9095   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9096                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9097 }
9098 
9099 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9100 /// amount.  The result has the specified value type.
9101 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9102                            SelectionDAG &DAG, const SDLoc &dl) {
9103   // Force LHS/RHS to be the right type.
9104   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9105   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9106 
9107   int Ops[16];
9108   for (unsigned i = 0; i != 16; ++i)
9109     Ops[i] = i + Amt;
9110   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9111   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9112 }
9113 
9114 /// Do we have an efficient pattern in a .td file for this node?
9115 ///
9116 /// \param V - pointer to the BuildVectorSDNode being matched
9117 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9118 ///
9119 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9120 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9121 /// the opposite is true (expansion is beneficial) are:
9122 /// - The node builds a vector out of integers that are not 32 or 64-bits
9123 /// - The node builds a vector out of constants
9124 /// - The node is a "load-and-splat"
9125 /// In all other cases, we will choose to keep the BUILD_VECTOR.
9126 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9127                                             bool HasDirectMove,
9128                                             bool HasP8Vector) {
9129   EVT VecVT = V->getValueType(0);
9130   bool RightType = VecVT == MVT::v2f64 ||
9131     (HasP8Vector && VecVT == MVT::v4f32) ||
9132     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9133   if (!RightType)
9134     return false;
9135 
9136   bool IsSplat = true;
9137   bool IsLoad = false;
9138   SDValue Op0 = V->getOperand(0);
9139 
9140   // This function is called in a block that confirms the node is not a constant
9141   // splat. So a constant BUILD_VECTOR here means the vector is built out of
9142   // different constants.
9143   if (V->isConstant())
9144     return false;
9145   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9146     if (V->getOperand(i).isUndef())
9147       return false;
9148     // We want to expand nodes that represent load-and-splat even if the
9149     // loaded value is a floating point truncation or conversion to int.
9150     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9151         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9152          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9153         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9154          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9155         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9156          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9157       IsLoad = true;
9158     // If the operands are different or the input is not a load and has more
9159     // uses than just this BV node, then it isn't a splat.
9160     if (V->getOperand(i) != Op0 ||
9161         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9162       IsSplat = false;
9163   }
9164   return !(IsSplat && IsLoad);
9165 }
9166 
9167 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9168 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9169 
9170   SDLoc dl(Op);
9171   SDValue Op0 = Op->getOperand(0);
9172 
9173   if ((Op.getValueType() != MVT::f128) ||
9174       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9175       (Op0.getOperand(0).getValueType() != MVT::i64) ||
9176       (Op0.getOperand(1).getValueType() != MVT::i64))
9177     return SDValue();
9178 
9179   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9180                      Op0.getOperand(1));
9181 }
9182 
9183 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9184   const SDValue *InputLoad = &Op;
9185   if (InputLoad->getOpcode() == ISD::BITCAST)
9186     InputLoad = &InputLoad->getOperand(0);
9187   if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9188       InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9189     IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9190     InputLoad = &InputLoad->getOperand(0);
9191   }
9192   if (InputLoad->getOpcode() != ISD::LOAD)
9193     return nullptr;
9194   LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9195   return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9196 }
9197 
9198 // Convert the argument APFloat to a single precision APFloat if there is no
9199 // loss in information during the conversion to single precision APFloat and the
9200 // resulting number is not a denormal number. Return true if successful.
9201 bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9202   APFloat APFloatToConvert = ArgAPFloat;
9203   bool LosesInfo = true;
9204   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9205                            &LosesInfo);
9206   bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9207   if (Success)
9208     ArgAPFloat = APFloatToConvert;
9209   return Success;
9210 }
9211 
9212 // Bitcast the argument APInt to a double and convert it to a single precision
9213 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9214 // argument if there is no loss in information during the conversion from
9215 // double to single precision APFloat and the resulting number is not a denormal
9216 // number. Return true if successful.
9217 bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9218   double DpValue = ArgAPInt.bitsToDouble();
9219   APFloat APFloatDp(DpValue);
9220   bool Success = convertToNonDenormSingle(APFloatDp);
9221   if (Success)
9222     ArgAPInt = APFloatDp.bitcastToAPInt();
9223   return Success;
9224 }
9225 
9226 // If this is a case we can't handle, return null and let the default
9227 // expansion code take care of it.  If we CAN select this case, and if it
9228 // selects to a single instruction, return Op.  Otherwise, if we can codegen
9229 // this case more efficiently than a constant pool load, lower it to the
9230 // sequence of ops that should be used.
9231 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9232                                              SelectionDAG &DAG) const {
9233   SDLoc dl(Op);
9234   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9235   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9236 
9237   // Check if this is a splat of a constant value.
9238   APInt APSplatBits, APSplatUndef;
9239   unsigned SplatBitSize;
9240   bool HasAnyUndefs;
9241   bool BVNIsConstantSplat =
9242       BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9243                            HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9244 
9245   // If it is a splat of a double, check if we can shrink it to a 32 bit
9246   // non-denormal float which when converted back to double gives us the same
9247   // double. This is to exploit the XXSPLTIDP instruction.
9248   if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
9249       (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
9250       convertToNonDenormSingle(APSplatBits)) {
9251     SDValue SplatNode = DAG.getNode(
9252         PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9253         DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9254     return DAG.getBitcast(Op.getValueType(), SplatNode);
9255   }
9256 
9257   if (!BVNIsConstantSplat || SplatBitSize > 32) {
9258 
9259     bool IsPermutedLoad = false;
9260     const SDValue *InputLoad =
9261         getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
9262     // Handle load-and-splat patterns as we have instructions that will do this
9263     // in one go.
9264     if (InputLoad && DAG.isSplatValue(Op, true)) {
9265       LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9266 
9267       // We have handling for 4 and 8 byte elements.
9268       unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
9269 
9270       // Checking for a single use of this load, we have to check for vector
9271       // width (128 bits) / ElementSize uses (since each operand of the
9272       // BUILD_VECTOR is a separate use of the value.
9273       unsigned NumUsesOfInputLD = 128 / ElementSize;
9274       for (SDValue BVInOp : Op->ops())
9275         if (BVInOp.isUndef())
9276           NumUsesOfInputLD--;
9277       assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9278       if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9279           ((Subtarget.hasVSX() && ElementSize == 64) ||
9280            (Subtarget.hasP9Vector() && ElementSize == 32))) {
9281         SDValue Ops[] = {
9282           LD->getChain(),    // Chain
9283           LD->getBasePtr(),  // Ptr
9284           DAG.getValueType(Op.getValueType()) // VT
9285         };
9286         SDValue LdSplt = DAG.getMemIntrinsicNode(
9287             PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
9288             Ops, LD->getMemoryVT(), LD->getMemOperand());
9289         // Replace all uses of the output chain of the original load with the
9290         // output chain of the new load.
9291         DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9292                                       LdSplt.getValue(1));
9293         return LdSplt;
9294       }
9295     }
9296 
9297     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
9298     // lowered to VSX instructions under certain conditions.
9299     // Without VSX, there is no pattern more efficient than expanding the node.
9300     if (Subtarget.hasVSX() &&
9301         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9302                                         Subtarget.hasP8Vector()))
9303       return Op;
9304     return SDValue();
9305   }
9306 
9307   uint64_t SplatBits = APSplatBits.getZExtValue();
9308   uint64_t SplatUndef = APSplatUndef.getZExtValue();
9309   unsigned SplatSize = SplatBitSize / 8;
9310 
9311   // First, handle single instruction cases.
9312 
9313   // All zeros?
9314   if (SplatBits == 0) {
9315     // Canonicalize all zero vectors to be v4i32.
9316     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9317       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9318       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9319     }
9320     return Op;
9321   }
9322 
9323   // We have XXSPLTIW for constant splats four bytes wide.
9324   // Given vector length is a multiple of 4, 2-byte splats can be replaced
9325   // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9326   // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9327   // turned into a 4-byte splat of 0xABABABAB.
9328   if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9329     return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9330                                   Op.getValueType(), DAG, dl);
9331 
9332   if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9333     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9334                                   dl);
9335 
9336   // We have XXSPLTIB for constant splats one byte wide.
9337   if (Subtarget.hasP9Vector() && SplatSize == 1)
9338     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9339                                   dl);
9340 
9341   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9342   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9343                     (32-SplatBitSize));
9344   if (SextVal >= -16 && SextVal <= 15)
9345     return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9346                                   dl);
9347 
9348   // Two instruction sequences.
9349 
9350   // If this value is in the range [-32,30] and is even, use:
9351   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9352   // If this value is in the range [17,31] and is odd, use:
9353   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9354   // If this value is in the range [-31,-17] and is odd, use:
9355   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9356   // Note the last two are three-instruction sequences.
9357   if (SextVal >= -32 && SextVal <= 31) {
9358     // To avoid having these optimizations undone by constant folding,
9359     // we convert to a pseudo that will be expanded later into one of
9360     // the above forms.
9361     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9362     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9363               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9364     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9365     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9366     if (VT == Op.getValueType())
9367       return RetVal;
9368     else
9369       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9370   }
9371 
9372   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
9373   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
9374   // for fneg/fabs.
9375   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9376     // Make -1 and vspltisw -1:
9377     SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9378 
9379     // Make the VSLW intrinsic, computing 0x8000_0000.
9380     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9381                                    OnesV, DAG, dl);
9382 
9383     // xor by OnesV to invert it.
9384     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9385     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9386   }
9387 
9388   // Check to see if this is a wide variety of vsplti*, binop self cases.
9389   static const signed char SplatCsts[] = {
9390     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9391     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9392   };
9393 
9394   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9395     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9396     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
9397     int i = SplatCsts[idx];
9398 
9399     // Figure out what shift amount will be used by altivec if shifted by i in
9400     // this splat size.
9401     unsigned TypeShiftAmt = i & (SplatBitSize-1);
9402 
9403     // vsplti + shl self.
9404     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9405       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9406       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9407         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9408         Intrinsic::ppc_altivec_vslw
9409       };
9410       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9411       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9412     }
9413 
9414     // vsplti + srl self.
9415     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9416       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9417       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9418         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9419         Intrinsic::ppc_altivec_vsrw
9420       };
9421       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9422       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9423     }
9424 
9425     // vsplti + sra self.
9426     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9427       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9428       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9429         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
9430         Intrinsic::ppc_altivec_vsraw
9431       };
9432       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9433       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9434     }
9435 
9436     // vsplti + rol self.
9437     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9438                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9439       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9440       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9441         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9442         Intrinsic::ppc_altivec_vrlw
9443       };
9444       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9445       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9446     }
9447 
9448     // t = vsplti c, result = vsldoi t, t, 1
9449     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9450       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9451       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9452       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9453     }
9454     // t = vsplti c, result = vsldoi t, t, 2
9455     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9456       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9457       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9458       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9459     }
9460     // t = vsplti c, result = vsldoi t, t, 3
9461     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9462       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9463       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9464       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9465     }
9466   }
9467 
9468   return SDValue();
9469 }
9470 
9471 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9472 /// the specified operations to build the shuffle.
9473 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9474                                       SDValue RHS, SelectionDAG &DAG,
9475                                       const SDLoc &dl) {
9476   unsigned OpNum = (PFEntry >> 26) & 0x0F;
9477   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9478   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
9479 
9480   enum {
9481     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9482     OP_VMRGHW,
9483     OP_VMRGLW,
9484     OP_VSPLTISW0,
9485     OP_VSPLTISW1,
9486     OP_VSPLTISW2,
9487     OP_VSPLTISW3,
9488     OP_VSLDOI4,
9489     OP_VSLDOI8,
9490     OP_VSLDOI12
9491   };
9492 
9493   if (OpNum == OP_COPY) {
9494     if (LHSID == (1*9+2)*9+3) return LHS;
9495     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9496     return RHS;
9497   }
9498 
9499   SDValue OpLHS, OpRHS;
9500   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9501   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9502 
9503   int ShufIdxs[16];
9504   switch (OpNum) {
9505   default: llvm_unreachable("Unknown i32 permute!");
9506   case OP_VMRGHW:
9507     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
9508     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9509     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
9510     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9511     break;
9512   case OP_VMRGLW:
9513     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9514     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9515     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9516     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9517     break;
9518   case OP_VSPLTISW0:
9519     for (unsigned i = 0; i != 16; ++i)
9520       ShufIdxs[i] = (i&3)+0;
9521     break;
9522   case OP_VSPLTISW1:
9523     for (unsigned i = 0; i != 16; ++i)
9524       ShufIdxs[i] = (i&3)+4;
9525     break;
9526   case OP_VSPLTISW2:
9527     for (unsigned i = 0; i != 16; ++i)
9528       ShufIdxs[i] = (i&3)+8;
9529     break;
9530   case OP_VSPLTISW3:
9531     for (unsigned i = 0; i != 16; ++i)
9532       ShufIdxs[i] = (i&3)+12;
9533     break;
9534   case OP_VSLDOI4:
9535     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9536   case OP_VSLDOI8:
9537     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9538   case OP_VSLDOI12:
9539     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9540   }
9541   EVT VT = OpLHS.getValueType();
9542   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9543   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9544   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9545   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9546 }
9547 
9548 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9549 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9550 /// SDValue.
9551 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9552                                            SelectionDAG &DAG) const {
9553   const unsigned BytesInVector = 16;
9554   bool IsLE = Subtarget.isLittleEndian();
9555   SDLoc dl(N);
9556   SDValue V1 = N->getOperand(0);
9557   SDValue V2 = N->getOperand(1);
9558   unsigned ShiftElts = 0, InsertAtByte = 0;
9559   bool Swap = false;
9560 
9561   // Shifts required to get the byte we want at element 7.
9562   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
9563                                    0, 15, 14, 13, 12, 11, 10, 9};
9564   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9565                                 1, 2,  3,  4,  5,  6,  7,  8};
9566 
9567   ArrayRef<int> Mask = N->getMask();
9568   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9569 
9570   // For each mask element, find out if we're just inserting something
9571   // from V2 into V1 or vice versa.
9572   // Possible permutations inserting an element from V2 into V1:
9573   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9574   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9575   //   ...
9576   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9577   // Inserting from V1 into V2 will be similar, except mask range will be
9578   // [16,31].
9579 
9580   bool FoundCandidate = false;
9581   // If both vector operands for the shuffle are the same vector, the mask
9582   // will contain only elements from the first one and the second one will be
9583   // undef.
9584   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9585   // Go through the mask of half-words to find an element that's being moved
9586   // from one vector to the other.
9587   for (unsigned i = 0; i < BytesInVector; ++i) {
9588     unsigned CurrentElement = Mask[i];
9589     // If 2nd operand is undefined, we should only look for element 7 in the
9590     // Mask.
9591     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9592       continue;
9593 
9594     bool OtherElementsInOrder = true;
9595     // Examine the other elements in the Mask to see if they're in original
9596     // order.
9597     for (unsigned j = 0; j < BytesInVector; ++j) {
9598       if (j == i)
9599         continue;
9600       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9601       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
9602       // in which we always assume we're always picking from the 1st operand.
9603       int MaskOffset =
9604           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9605       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9606         OtherElementsInOrder = false;
9607         break;
9608       }
9609     }
9610     // If other elements are in original order, we record the number of shifts
9611     // we need to get the element we want into element 7. Also record which byte
9612     // in the vector we should insert into.
9613     if (OtherElementsInOrder) {
9614       // If 2nd operand is undefined, we assume no shifts and no swapping.
9615       if (V2.isUndef()) {
9616         ShiftElts = 0;
9617         Swap = false;
9618       } else {
9619         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9620         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9621                          : BigEndianShifts[CurrentElement & 0xF];
9622         Swap = CurrentElement < BytesInVector;
9623       }
9624       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9625       FoundCandidate = true;
9626       break;
9627     }
9628   }
9629 
9630   if (!FoundCandidate)
9631     return SDValue();
9632 
9633   // Candidate found, construct the proper SDAG sequence with VINSERTB,
9634   // optionally with VECSHL if shift is required.
9635   if (Swap)
9636     std::swap(V1, V2);
9637   if (V2.isUndef())
9638     V2 = V1;
9639   if (ShiftElts) {
9640     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9641                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9642     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9643                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9644   }
9645   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9646                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
9647 }
9648 
9649 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9650 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9651 /// SDValue.
9652 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9653                                            SelectionDAG &DAG) const {
9654   const unsigned NumHalfWords = 8;
9655   const unsigned BytesInVector = NumHalfWords * 2;
9656   // Check that the shuffle is on half-words.
9657   if (!isNByteElemShuffleMask(N, 2, 1))
9658     return SDValue();
9659 
9660   bool IsLE = Subtarget.isLittleEndian();
9661   SDLoc dl(N);
9662   SDValue V1 = N->getOperand(0);
9663   SDValue V2 = N->getOperand(1);
9664   unsigned ShiftElts = 0, InsertAtByte = 0;
9665   bool Swap = false;
9666 
9667   // Shifts required to get the half-word we want at element 3.
9668   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9669   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9670 
9671   uint32_t Mask = 0;
9672   uint32_t OriginalOrderLow = 0x1234567;
9673   uint32_t OriginalOrderHigh = 0x89ABCDEF;
9674   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
9675   // 32-bit space, only need 4-bit nibbles per element.
9676   for (unsigned i = 0; i < NumHalfWords; ++i) {
9677     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9678     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9679   }
9680 
9681   // For each mask element, find out if we're just inserting something
9682   // from V2 into V1 or vice versa.  Possible permutations inserting an element
9683   // from V2 into V1:
9684   //   X, 1, 2, 3, 4, 5, 6, 7
9685   //   0, X, 2, 3, 4, 5, 6, 7
9686   //   0, 1, X, 3, 4, 5, 6, 7
9687   //   0, 1, 2, X, 4, 5, 6, 7
9688   //   0, 1, 2, 3, X, 5, 6, 7
9689   //   0, 1, 2, 3, 4, X, 6, 7
9690   //   0, 1, 2, 3, 4, 5, X, 7
9691   //   0, 1, 2, 3, 4, 5, 6, X
9692   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9693 
9694   bool FoundCandidate = false;
9695   // Go through the mask of half-words to find an element that's being moved
9696   // from one vector to the other.
9697   for (unsigned i = 0; i < NumHalfWords; ++i) {
9698     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9699     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9700     uint32_t MaskOtherElts = ~(0xF << MaskShift);
9701     uint32_t TargetOrder = 0x0;
9702 
9703     // If both vector operands for the shuffle are the same vector, the mask
9704     // will contain only elements from the first one and the second one will be
9705     // undef.
9706     if (V2.isUndef()) {
9707       ShiftElts = 0;
9708       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9709       TargetOrder = OriginalOrderLow;
9710       Swap = false;
9711       // Skip if not the correct element or mask of other elements don't equal
9712       // to our expected order.
9713       if (MaskOneElt == VINSERTHSrcElem &&
9714           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9715         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9716         FoundCandidate = true;
9717         break;
9718       }
9719     } else { // If both operands are defined.
9720       // Target order is [8,15] if the current mask is between [0,7].
9721       TargetOrder =
9722           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9723       // Skip if mask of other elements don't equal our expected order.
9724       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9725         // We only need the last 3 bits for the number of shifts.
9726         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9727                          : BigEndianShifts[MaskOneElt & 0x7];
9728         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9729         Swap = MaskOneElt < NumHalfWords;
9730         FoundCandidate = true;
9731         break;
9732       }
9733     }
9734   }
9735 
9736   if (!FoundCandidate)
9737     return SDValue();
9738 
9739   // Candidate found, construct the proper SDAG sequence with VINSERTH,
9740   // optionally with VECSHL if shift is required.
9741   if (Swap)
9742     std::swap(V1, V2);
9743   if (V2.isUndef())
9744     V2 = V1;
9745   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9746   if (ShiftElts) {
9747     // Double ShiftElts because we're left shifting on v16i8 type.
9748     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9749                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9750     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9751     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9752                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9753     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9754   }
9755   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9756   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9757                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
9758   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9759 }
9760 
9761 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9762 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9763 /// return the default SDValue.
9764 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9765                                               SelectionDAG &DAG) const {
9766   // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9767   // to v16i8. Peek through the bitcasts to get the actual operands.
9768   SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9769   SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9770 
9771   auto ShuffleMask = SVN->getMask();
9772   SDValue VecShuffle(SVN, 0);
9773   SDLoc DL(SVN);
9774 
9775   // Check that we have a four byte shuffle.
9776   if (!isNByteElemShuffleMask(SVN, 4, 1))
9777     return SDValue();
9778 
9779   // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9780   if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9781     std::swap(LHS, RHS);
9782     VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
9783     ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9784   }
9785 
9786   // Ensure that the RHS is a vector of constants.
9787   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9788   if (!BVN)
9789     return SDValue();
9790 
9791   // Check if RHS is a splat of 4-bytes (or smaller).
9792   APInt APSplatValue, APSplatUndef;
9793   unsigned SplatBitSize;
9794   bool HasAnyUndefs;
9795   if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9796                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9797       SplatBitSize > 32)
9798     return SDValue();
9799 
9800   // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9801   // The instruction splats a constant C into two words of the source vector
9802   // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9803   // Thus we check that the shuffle mask is the equivalent  of
9804   // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9805   // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9806   // within each word are consecutive, so we only need to check the first byte.
9807   SDValue Index;
9808   bool IsLE = Subtarget.isLittleEndian();
9809   if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9810       (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9811        ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9812     Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9813   else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9814            (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9815             ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9816     Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9817   else
9818     return SDValue();
9819 
9820   // If the splat is narrower than 32-bits, we need to get the 32-bit value
9821   // for XXSPLTI32DX.
9822   unsigned SplatVal = APSplatValue.getZExtValue();
9823   for (; SplatBitSize < 32; SplatBitSize <<= 1)
9824     SplatVal |= (SplatVal << SplatBitSize);
9825 
9826   SDValue SplatNode = DAG.getNode(
9827       PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9828       Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9829   return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9830 }
9831 
9832 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9833 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9834 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9835 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9836 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9837   assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9838   assert(Op.getValueType() == MVT::v1i128 &&
9839          "Only set v1i128 as custom, other type shouldn't reach here!");
9840   SDLoc dl(Op);
9841   SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9842   SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9843   unsigned SHLAmt = N1.getConstantOperandVal(0);
9844   if (SHLAmt % 8 == 0) {
9845     SmallVector<int, 16> Mask(16, 0);
9846     std::iota(Mask.begin(), Mask.end(), 0);
9847     std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9848     if (SDValue Shuffle =
9849             DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9850                                  DAG.getUNDEF(MVT::v16i8), Mask))
9851       return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9852   }
9853   SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9854   SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9855                               DAG.getConstant(SHLAmt, dl, MVT::i32));
9856   SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9857                               DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9858   SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9859   return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9860 }
9861 
9862 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
9863 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
9864 /// return the code it can be lowered into.  Worst case, it can always be
9865 /// lowered into a vperm.
9866 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9867                                                SelectionDAG &DAG) const {
9868   SDLoc dl(Op);
9869   SDValue V1 = Op.getOperand(0);
9870   SDValue V2 = Op.getOperand(1);
9871   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9872 
9873   // Any nodes that were combined in the target-independent combiner prior
9874   // to vector legalization will not be sent to the target combine. Try to
9875   // combine it here.
9876   if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9877     if (!isa<ShuffleVectorSDNode>(NewShuffle))
9878       return NewShuffle;
9879     Op = NewShuffle;
9880     SVOp = cast<ShuffleVectorSDNode>(Op);
9881     V1 = Op.getOperand(0);
9882     V2 = Op.getOperand(1);
9883   }
9884   EVT VT = Op.getValueType();
9885   bool isLittleEndian = Subtarget.isLittleEndian();
9886 
9887   unsigned ShiftElts, InsertAtByte;
9888   bool Swap = false;
9889 
9890   // If this is a load-and-splat, we can do that with a single instruction
9891   // in some cases. However if the load has multiple uses, we don't want to
9892   // combine it because that will just produce multiple loads.
9893   bool IsPermutedLoad = false;
9894   const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9895   if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9896       (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9897       InputLoad->hasOneUse()) {
9898     bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9899     int SplatIdx =
9900       PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9901 
9902     // The splat index for permuted loads will be in the left half of the vector
9903     // which is strictly wider than the loaded value by 8 bytes. So we need to
9904     // adjust the splat index to point to the correct address in memory.
9905     if (IsPermutedLoad) {
9906       assert(isLittleEndian && "Unexpected permuted load on big endian target");
9907       SplatIdx += IsFourByte ? 2 : 1;
9908       assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9909              "Splat of a value outside of the loaded memory");
9910     }
9911 
9912     LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9913     // For 4-byte load-and-splat, we need Power9.
9914     if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9915       uint64_t Offset = 0;
9916       if (IsFourByte)
9917         Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9918       else
9919         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9920 
9921       SDValue BasePtr = LD->getBasePtr();
9922       if (Offset != 0)
9923         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9924                               BasePtr, DAG.getIntPtrConstant(Offset, dl));
9925       SDValue Ops[] = {
9926         LD->getChain(),    // Chain
9927         BasePtr,           // BasePtr
9928         DAG.getValueType(Op.getValueType()) // VT
9929       };
9930       SDVTList VTL =
9931         DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9932       SDValue LdSplt =
9933         DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9934                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
9935       DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9936       if (LdSplt.getValueType() != SVOp->getValueType(0))
9937         LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9938       return LdSplt;
9939     }
9940   }
9941   if (Subtarget.hasP9Vector() &&
9942       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9943                            isLittleEndian)) {
9944     if (Swap)
9945       std::swap(V1, V2);
9946     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9947     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9948     if (ShiftElts) {
9949       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9950                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
9951       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9952                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9953       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9954     }
9955     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9956                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9957     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9958   }
9959 
9960   if (Subtarget.hasPrefixInstrs()) {
9961     SDValue SplatInsertNode;
9962     if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9963       return SplatInsertNode;
9964   }
9965 
9966   if (Subtarget.hasP9Altivec()) {
9967     SDValue NewISDNode;
9968     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9969       return NewISDNode;
9970 
9971     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9972       return NewISDNode;
9973   }
9974 
9975   if (Subtarget.hasVSX() &&
9976       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9977     if (Swap)
9978       std::swap(V1, V2);
9979     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9980     SDValue Conv2 =
9981         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9982 
9983     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9984                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9985     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9986   }
9987 
9988   if (Subtarget.hasVSX() &&
9989     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9990     if (Swap)
9991       std::swap(V1, V2);
9992     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9993     SDValue Conv2 =
9994         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9995 
9996     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9997                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9998     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9999   }
10000 
10001   if (Subtarget.hasP9Vector()) {
10002      if (PPC::isXXBRHShuffleMask(SVOp)) {
10003       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10004       SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10005       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10006     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10007       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10008       SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10009       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10010     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10011       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10012       SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10013       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10014     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10015       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10016       SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10017       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10018     }
10019   }
10020 
10021   if (Subtarget.hasVSX()) {
10022     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10023       int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10024 
10025       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10026       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10027                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
10028       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10029     }
10030 
10031     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10032     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10033       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10034       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10035       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10036     }
10037   }
10038 
10039   // Cases that are handled by instructions that take permute immediates
10040   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10041   // selected by the instruction selector.
10042   if (V2.isUndef()) {
10043     if (PPC::isSplatShuffleMask(SVOp, 1) ||
10044         PPC::isSplatShuffleMask(SVOp, 2) ||
10045         PPC::isSplatShuffleMask(SVOp, 4) ||
10046         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10047         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10048         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10049         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10050         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10051         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10052         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10053         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10054         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10055         (Subtarget.hasP8Altivec() && (
10056          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10057          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10058          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10059       return Op;
10060     }
10061   }
10062 
10063   // Altivec has a variety of "shuffle immediates" that take two vector inputs
10064   // and produce a fixed permutation.  If any of these match, do not lower to
10065   // VPERM.
10066   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10067   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10068       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10069       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10070       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10071       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10072       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10073       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10074       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10075       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10076       (Subtarget.hasP8Altivec() && (
10077        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10078        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10079        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10080     return Op;
10081 
10082   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
10083   // perfect shuffle table to emit an optimal matching sequence.
10084   ArrayRef<int> PermMask = SVOp->getMask();
10085 
10086   unsigned PFIndexes[4];
10087   bool isFourElementShuffle = true;
10088   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
10089     unsigned EltNo = 8;   // Start out undef.
10090     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
10091       if (PermMask[i*4+j] < 0)
10092         continue;   // Undef, ignore it.
10093 
10094       unsigned ByteSource = PermMask[i*4+j];
10095       if ((ByteSource & 3) != j) {
10096         isFourElementShuffle = false;
10097         break;
10098       }
10099 
10100       if (EltNo == 8) {
10101         EltNo = ByteSource/4;
10102       } else if (EltNo != ByteSource/4) {
10103         isFourElementShuffle = false;
10104         break;
10105       }
10106     }
10107     PFIndexes[i] = EltNo;
10108   }
10109 
10110   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10111   // perfect shuffle vector to determine if it is cost effective to do this as
10112   // discrete instructions, or whether we should use a vperm.
10113   // For now, we skip this for little endian until such time as we have a
10114   // little-endian perfect shuffle table.
10115   if (isFourElementShuffle && !isLittleEndian) {
10116     // Compute the index in the perfect shuffle table.
10117     unsigned PFTableIndex =
10118       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
10119 
10120     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10121     unsigned Cost  = (PFEntry >> 30);
10122 
10123     // Determining when to avoid vperm is tricky.  Many things affect the cost
10124     // of vperm, particularly how many times the perm mask needs to be computed.
10125     // For example, if the perm mask can be hoisted out of a loop or is already
10126     // used (perhaps because there are multiple permutes with the same shuffle
10127     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
10128     // the loop requires an extra register.
10129     //
10130     // As a compromise, we only emit discrete instructions if the shuffle can be
10131     // generated in 3 or fewer operations.  When we have loop information
10132     // available, if this block is within a loop, we should avoid using vperm
10133     // for 3-operation perms and use a constant pool load instead.
10134     if (Cost < 3)
10135       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10136   }
10137 
10138   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10139   // vector that will get spilled to the constant pool.
10140   if (V2.isUndef()) V2 = V1;
10141 
10142   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10143   // that it is in input element units, not in bytes.  Convert now.
10144 
10145   // For little endian, the order of the input vectors is reversed, and
10146   // the permutation mask is complemented with respect to 31.  This is
10147   // necessary to produce proper semantics with the big-endian-biased vperm
10148   // instruction.
10149   EVT EltVT = V1.getValueType().getVectorElementType();
10150   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10151 
10152   SmallVector<SDValue, 16> ResultMask;
10153   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10154     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10155 
10156     for (unsigned j = 0; j != BytesPerElement; ++j)
10157       if (isLittleEndian)
10158         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10159                                              dl, MVT::i32));
10160       else
10161         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10162                                              MVT::i32));
10163   }
10164 
10165   ShufflesHandledWithVPERM++;
10166   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10167   LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
10168   LLVM_DEBUG(SVOp->dump());
10169   LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
10170   LLVM_DEBUG(VPermMask.dump());
10171 
10172   if (isLittleEndian)
10173     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10174                        V2, V1, VPermMask);
10175   else
10176     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10177                        V1, V2, VPermMask);
10178 }
10179 
10180 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10181 /// vector comparison.  If it is, return true and fill in Opc/isDot with
10182 /// information about the intrinsic.
10183 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10184                                  bool &isDot, const PPCSubtarget &Subtarget) {
10185   unsigned IntrinsicID =
10186       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10187   CompareOpc = -1;
10188   isDot = false;
10189   switch (IntrinsicID) {
10190   default:
10191     return false;
10192   // Comparison predicates.
10193   case Intrinsic::ppc_altivec_vcmpbfp_p:
10194     CompareOpc = 966;
10195     isDot = true;
10196     break;
10197   case Intrinsic::ppc_altivec_vcmpeqfp_p:
10198     CompareOpc = 198;
10199     isDot = true;
10200     break;
10201   case Intrinsic::ppc_altivec_vcmpequb_p:
10202     CompareOpc = 6;
10203     isDot = true;
10204     break;
10205   case Intrinsic::ppc_altivec_vcmpequh_p:
10206     CompareOpc = 70;
10207     isDot = true;
10208     break;
10209   case Intrinsic::ppc_altivec_vcmpequw_p:
10210     CompareOpc = 134;
10211     isDot = true;
10212     break;
10213   case Intrinsic::ppc_altivec_vcmpequd_p:
10214     if (Subtarget.hasP8Altivec()) {
10215       CompareOpc = 199;
10216       isDot = true;
10217     } else
10218       return false;
10219     break;
10220   case Intrinsic::ppc_altivec_vcmpneb_p:
10221   case Intrinsic::ppc_altivec_vcmpneh_p:
10222   case Intrinsic::ppc_altivec_vcmpnew_p:
10223   case Intrinsic::ppc_altivec_vcmpnezb_p:
10224   case Intrinsic::ppc_altivec_vcmpnezh_p:
10225   case Intrinsic::ppc_altivec_vcmpnezw_p:
10226     if (Subtarget.hasP9Altivec()) {
10227       switch (IntrinsicID) {
10228       default:
10229         llvm_unreachable("Unknown comparison intrinsic.");
10230       case Intrinsic::ppc_altivec_vcmpneb_p:
10231         CompareOpc = 7;
10232         break;
10233       case Intrinsic::ppc_altivec_vcmpneh_p:
10234         CompareOpc = 71;
10235         break;
10236       case Intrinsic::ppc_altivec_vcmpnew_p:
10237         CompareOpc = 135;
10238         break;
10239       case Intrinsic::ppc_altivec_vcmpnezb_p:
10240         CompareOpc = 263;
10241         break;
10242       case Intrinsic::ppc_altivec_vcmpnezh_p:
10243         CompareOpc = 327;
10244         break;
10245       case Intrinsic::ppc_altivec_vcmpnezw_p:
10246         CompareOpc = 391;
10247         break;
10248       }
10249       isDot = true;
10250     } else
10251       return false;
10252     break;
10253   case Intrinsic::ppc_altivec_vcmpgefp_p:
10254     CompareOpc = 454;
10255     isDot = true;
10256     break;
10257   case Intrinsic::ppc_altivec_vcmpgtfp_p:
10258     CompareOpc = 710;
10259     isDot = true;
10260     break;
10261   case Intrinsic::ppc_altivec_vcmpgtsb_p:
10262     CompareOpc = 774;
10263     isDot = true;
10264     break;
10265   case Intrinsic::ppc_altivec_vcmpgtsh_p:
10266     CompareOpc = 838;
10267     isDot = true;
10268     break;
10269   case Intrinsic::ppc_altivec_vcmpgtsw_p:
10270     CompareOpc = 902;
10271     isDot = true;
10272     break;
10273   case Intrinsic::ppc_altivec_vcmpgtsd_p:
10274     if (Subtarget.hasP8Altivec()) {
10275       CompareOpc = 967;
10276       isDot = true;
10277     } else
10278       return false;
10279     break;
10280   case Intrinsic::ppc_altivec_vcmpgtub_p:
10281     CompareOpc = 518;
10282     isDot = true;
10283     break;
10284   case Intrinsic::ppc_altivec_vcmpgtuh_p:
10285     CompareOpc = 582;
10286     isDot = true;
10287     break;
10288   case Intrinsic::ppc_altivec_vcmpgtuw_p:
10289     CompareOpc = 646;
10290     isDot = true;
10291     break;
10292   case Intrinsic::ppc_altivec_vcmpgtud_p:
10293     if (Subtarget.hasP8Altivec()) {
10294       CompareOpc = 711;
10295       isDot = true;
10296     } else
10297       return false;
10298     break;
10299 
10300   case Intrinsic::ppc_altivec_vcmpequq:
10301   case Intrinsic::ppc_altivec_vcmpgtsq:
10302   case Intrinsic::ppc_altivec_vcmpgtuq:
10303     if (!Subtarget.isISA3_1())
10304       return false;
10305     switch (IntrinsicID) {
10306     default:
10307       llvm_unreachable("Unknown comparison intrinsic.");
10308     case Intrinsic::ppc_altivec_vcmpequq:
10309       CompareOpc = 455;
10310       break;
10311     case Intrinsic::ppc_altivec_vcmpgtsq:
10312       CompareOpc = 903;
10313       break;
10314     case Intrinsic::ppc_altivec_vcmpgtuq:
10315       CompareOpc = 647;
10316       break;
10317     }
10318     break;
10319 
10320   // VSX predicate comparisons use the same infrastructure
10321   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10322   case Intrinsic::ppc_vsx_xvcmpgedp_p:
10323   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10324   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10325   case Intrinsic::ppc_vsx_xvcmpgesp_p:
10326   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10327     if (Subtarget.hasVSX()) {
10328       switch (IntrinsicID) {
10329       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10330         CompareOpc = 99;
10331         break;
10332       case Intrinsic::ppc_vsx_xvcmpgedp_p:
10333         CompareOpc = 115;
10334         break;
10335       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10336         CompareOpc = 107;
10337         break;
10338       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10339         CompareOpc = 67;
10340         break;
10341       case Intrinsic::ppc_vsx_xvcmpgesp_p:
10342         CompareOpc = 83;
10343         break;
10344       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10345         CompareOpc = 75;
10346         break;
10347       }
10348       isDot = true;
10349     } else
10350       return false;
10351     break;
10352 
10353   // Normal Comparisons.
10354   case Intrinsic::ppc_altivec_vcmpbfp:
10355     CompareOpc = 966;
10356     break;
10357   case Intrinsic::ppc_altivec_vcmpeqfp:
10358     CompareOpc = 198;
10359     break;
10360   case Intrinsic::ppc_altivec_vcmpequb:
10361     CompareOpc = 6;
10362     break;
10363   case Intrinsic::ppc_altivec_vcmpequh:
10364     CompareOpc = 70;
10365     break;
10366   case Intrinsic::ppc_altivec_vcmpequw:
10367     CompareOpc = 134;
10368     break;
10369   case Intrinsic::ppc_altivec_vcmpequd:
10370     if (Subtarget.hasP8Altivec())
10371       CompareOpc = 199;
10372     else
10373       return false;
10374     break;
10375   case Intrinsic::ppc_altivec_vcmpneb:
10376   case Intrinsic::ppc_altivec_vcmpneh:
10377   case Intrinsic::ppc_altivec_vcmpnew:
10378   case Intrinsic::ppc_altivec_vcmpnezb:
10379   case Intrinsic::ppc_altivec_vcmpnezh:
10380   case Intrinsic::ppc_altivec_vcmpnezw:
10381     if (Subtarget.hasP9Altivec())
10382       switch (IntrinsicID) {
10383       default:
10384         llvm_unreachable("Unknown comparison intrinsic.");
10385       case Intrinsic::ppc_altivec_vcmpneb:
10386         CompareOpc = 7;
10387         break;
10388       case Intrinsic::ppc_altivec_vcmpneh:
10389         CompareOpc = 71;
10390         break;
10391       case Intrinsic::ppc_altivec_vcmpnew:
10392         CompareOpc = 135;
10393         break;
10394       case Intrinsic::ppc_altivec_vcmpnezb:
10395         CompareOpc = 263;
10396         break;
10397       case Intrinsic::ppc_altivec_vcmpnezh:
10398         CompareOpc = 327;
10399         break;
10400       case Intrinsic::ppc_altivec_vcmpnezw:
10401         CompareOpc = 391;
10402         break;
10403       }
10404     else
10405       return false;
10406     break;
10407   case Intrinsic::ppc_altivec_vcmpgefp:
10408     CompareOpc = 454;
10409     break;
10410   case Intrinsic::ppc_altivec_vcmpgtfp:
10411     CompareOpc = 710;
10412     break;
10413   case Intrinsic::ppc_altivec_vcmpgtsb:
10414     CompareOpc = 774;
10415     break;
10416   case Intrinsic::ppc_altivec_vcmpgtsh:
10417     CompareOpc = 838;
10418     break;
10419   case Intrinsic::ppc_altivec_vcmpgtsw:
10420     CompareOpc = 902;
10421     break;
10422   case Intrinsic::ppc_altivec_vcmpgtsd:
10423     if (Subtarget.hasP8Altivec())
10424       CompareOpc = 967;
10425     else
10426       return false;
10427     break;
10428   case Intrinsic::ppc_altivec_vcmpgtub:
10429     CompareOpc = 518;
10430     break;
10431   case Intrinsic::ppc_altivec_vcmpgtuh:
10432     CompareOpc = 582;
10433     break;
10434   case Intrinsic::ppc_altivec_vcmpgtuw:
10435     CompareOpc = 646;
10436     break;
10437   case Intrinsic::ppc_altivec_vcmpgtud:
10438     if (Subtarget.hasP8Altivec())
10439       CompareOpc = 711;
10440     else
10441       return false;
10442     break;
10443   case Intrinsic::ppc_altivec_vcmpequq_p:
10444   case Intrinsic::ppc_altivec_vcmpgtsq_p:
10445   case Intrinsic::ppc_altivec_vcmpgtuq_p:
10446     if (!Subtarget.isISA3_1())
10447       return false;
10448     switch (IntrinsicID) {
10449     default:
10450       llvm_unreachable("Unknown comparison intrinsic.");
10451     case Intrinsic::ppc_altivec_vcmpequq_p:
10452       CompareOpc = 455;
10453       break;
10454     case Intrinsic::ppc_altivec_vcmpgtsq_p:
10455       CompareOpc = 903;
10456       break;
10457     case Intrinsic::ppc_altivec_vcmpgtuq_p:
10458       CompareOpc = 647;
10459       break;
10460     }
10461     isDot = true;
10462     break;
10463   }
10464   return true;
10465 }
10466 
10467 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10468 /// lower, do it, otherwise return null.
10469 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10470                                                    SelectionDAG &DAG) const {
10471   unsigned IntrinsicID =
10472     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10473 
10474   SDLoc dl(Op);
10475 
10476   switch (IntrinsicID) {
10477   case Intrinsic::thread_pointer:
10478     // Reads the thread pointer register, used for __builtin_thread_pointer.
10479     if (Subtarget.isPPC64())
10480       return DAG.getRegister(PPC::X13, MVT::i64);
10481     return DAG.getRegister(PPC::R2, MVT::i32);
10482 
10483   case Intrinsic::ppc_mma_disassemble_acc:
10484   case Intrinsic::ppc_mma_disassemble_pair: {
10485     int NumVecs = 2;
10486     SDValue WideVec = Op.getOperand(1);
10487     if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10488       NumVecs = 4;
10489       WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10490     }
10491     SmallVector<SDValue, 4> RetOps;
10492     for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10493       SDValue Extract = DAG.getNode(
10494           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10495           DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10496                                                      : VecNo,
10497                           dl, MVT::i64));
10498       RetOps.push_back(Extract);
10499     }
10500     return DAG.getMergeValues(RetOps, dl);
10501   }
10502   }
10503 
10504   // If this is a lowered altivec predicate compare, CompareOpc is set to the
10505   // opcode number of the comparison.
10506   int CompareOpc;
10507   bool isDot;
10508   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10509     return SDValue();    // Don't custom lower most intrinsics.
10510 
10511   // If this is a non-dot comparison, make the VCMP node and we are done.
10512   if (!isDot) {
10513     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10514                               Op.getOperand(1), Op.getOperand(2),
10515                               DAG.getConstant(CompareOpc, dl, MVT::i32));
10516     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10517   }
10518 
10519   // Create the PPCISD altivec 'dot' comparison node.
10520   SDValue Ops[] = {
10521     Op.getOperand(2),  // LHS
10522     Op.getOperand(3),  // RHS
10523     DAG.getConstant(CompareOpc, dl, MVT::i32)
10524   };
10525   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10526   SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10527 
10528   // Now that we have the comparison, emit a copy from the CR to a GPR.
10529   // This is flagged to the above dot comparison.
10530   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10531                                 DAG.getRegister(PPC::CR6, MVT::i32),
10532                                 CompNode.getValue(1));
10533 
10534   // Unpack the result based on how the target uses it.
10535   unsigned BitNo;   // Bit # of CR6.
10536   bool InvertBit;   // Invert result?
10537   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10538   default:  // Can't happen, don't crash on invalid number though.
10539   case 0:   // Return the value of the EQ bit of CR6.
10540     BitNo = 0; InvertBit = false;
10541     break;
10542   case 1:   // Return the inverted value of the EQ bit of CR6.
10543     BitNo = 0; InvertBit = true;
10544     break;
10545   case 2:   // Return the value of the LT bit of CR6.
10546     BitNo = 2; InvertBit = false;
10547     break;
10548   case 3:   // Return the inverted value of the LT bit of CR6.
10549     BitNo = 2; InvertBit = true;
10550     break;
10551   }
10552 
10553   // Shift the bit into the low position.
10554   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10555                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10556   // Isolate the bit.
10557   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10558                       DAG.getConstant(1, dl, MVT::i32));
10559 
10560   // If we are supposed to, toggle the bit.
10561   if (InvertBit)
10562     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10563                         DAG.getConstant(1, dl, MVT::i32));
10564   return Flags;
10565 }
10566 
10567 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10568                                                SelectionDAG &DAG) const {
10569   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10570   // the beginning of the argument list.
10571   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10572   SDLoc DL(Op);
10573   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10574   case Intrinsic::ppc_cfence: {
10575     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10576     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10577     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10578                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
10579                                                   Op.getOperand(ArgStart + 1)),
10580                                       Op.getOperand(0)),
10581                    0);
10582   }
10583   default:
10584     break;
10585   }
10586   return SDValue();
10587 }
10588 
10589 // Lower scalar BSWAP64 to xxbrd.
10590 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10591   SDLoc dl(Op);
10592   // MTVSRDD
10593   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10594                    Op.getOperand(0));
10595   // XXBRD
10596   Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10597   // MFVSRD
10598   int VectorIndex = 0;
10599   if (Subtarget.isLittleEndian())
10600     VectorIndex = 1;
10601   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10602                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10603   return Op;
10604 }
10605 
10606 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10607 // compared to a value that is atomically loaded (atomic loads zero-extend).
10608 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10609                                                 SelectionDAG &DAG) const {
10610   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10611          "Expecting an atomic compare-and-swap here.");
10612   SDLoc dl(Op);
10613   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10614   EVT MemVT = AtomicNode->getMemoryVT();
10615   if (MemVT.getSizeInBits() >= 32)
10616     return Op;
10617 
10618   SDValue CmpOp = Op.getOperand(2);
10619   // If this is already correctly zero-extended, leave it alone.
10620   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10621   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10622     return Op;
10623 
10624   // Clear the high bits of the compare operand.
10625   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10626   SDValue NewCmpOp =
10627     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10628                 DAG.getConstant(MaskVal, dl, MVT::i32));
10629 
10630   // Replace the existing compare operand with the properly zero-extended one.
10631   SmallVector<SDValue, 4> Ops;
10632   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10633     Ops.push_back(AtomicNode->getOperand(i));
10634   Ops[2] = NewCmpOp;
10635   MachineMemOperand *MMO = AtomicNode->getMemOperand();
10636   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10637   auto NodeTy =
10638     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10639   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10640 }
10641 
10642 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10643                                                  SelectionDAG &DAG) const {
10644   SDLoc dl(Op);
10645   // Create a stack slot that is 16-byte aligned.
10646   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10647   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10648   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10649   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10650 
10651   // Store the input value into Value#0 of the stack slot.
10652   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10653                                MachinePointerInfo());
10654   // Load it out.
10655   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10656 }
10657 
10658 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10659                                                   SelectionDAG &DAG) const {
10660   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10661          "Should only be called for ISD::INSERT_VECTOR_ELT");
10662 
10663   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10664   // We have legal lowering for constant indices but not for variable ones.
10665   if (!C)
10666     return SDValue();
10667 
10668   EVT VT = Op.getValueType();
10669   SDLoc dl(Op);
10670   SDValue V1 = Op.getOperand(0);
10671   SDValue V2 = Op.getOperand(1);
10672   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10673   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10674     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10675     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10676     unsigned InsertAtElement = C->getZExtValue();
10677     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10678     if (Subtarget.isLittleEndian()) {
10679       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10680     }
10681     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10682                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
10683   }
10684   return Op;
10685 }
10686 
10687 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10688                                            SelectionDAG &DAG) const {
10689   SDLoc dl(Op);
10690   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10691   SDValue LoadChain = LN->getChain();
10692   SDValue BasePtr = LN->getBasePtr();
10693   EVT VT = Op.getValueType();
10694 
10695   if (VT != MVT::v256i1 && VT != MVT::v512i1)
10696     return Op;
10697 
10698   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10699   // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10700   // 2 or 4 vsx registers.
10701   assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10702          "Type unsupported without MMA");
10703   assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10704          "Type unsupported without paired vector support");
10705   Align Alignment = LN->getAlign();
10706   SmallVector<SDValue, 4> Loads;
10707   SmallVector<SDValue, 4> LoadChains;
10708   unsigned NumVecs = VT.getSizeInBits() / 128;
10709   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10710     SDValue Load =
10711         DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10712                     LN->getPointerInfo().getWithOffset(Idx * 16),
10713                     commonAlignment(Alignment, Idx * 16),
10714                     LN->getMemOperand()->getFlags(), LN->getAAInfo());
10715     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10716                           DAG.getConstant(16, dl, BasePtr.getValueType()));
10717     Loads.push_back(Load);
10718     LoadChains.push_back(Load.getValue(1));
10719   }
10720   if (Subtarget.isLittleEndian()) {
10721     std::reverse(Loads.begin(), Loads.end());
10722     std::reverse(LoadChains.begin(), LoadChains.end());
10723   }
10724   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10725   SDValue Value =
10726       DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
10727                   dl, VT, Loads);
10728   SDValue RetOps[] = {Value, TF};
10729   return DAG.getMergeValues(RetOps, dl);
10730 }
10731 
10732 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10733                                             SelectionDAG &DAG) const {
10734   SDLoc dl(Op);
10735   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10736   SDValue StoreChain = SN->getChain();
10737   SDValue BasePtr = SN->getBasePtr();
10738   SDValue Value = SN->getValue();
10739   EVT StoreVT = Value.getValueType();
10740 
10741   if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10742     return Op;
10743 
10744   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10745   // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10746   // underlying registers individually.
10747   assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10748          "Type unsupported without MMA");
10749   assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10750          "Type unsupported without paired vector support");
10751   Align Alignment = SN->getAlign();
10752   SmallVector<SDValue, 4> Stores;
10753   unsigned NumVecs = 2;
10754   if (StoreVT == MVT::v512i1) {
10755     Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
10756     NumVecs = 4;
10757   }
10758   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10759     unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10760     SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
10761                               DAG.getConstant(VecNum, dl, MVT::i64));
10762     SDValue Store =
10763         DAG.getStore(StoreChain, dl, Elt, BasePtr,
10764                      SN->getPointerInfo().getWithOffset(Idx * 16),
10765                      commonAlignment(Alignment, Idx * 16),
10766                      SN->getMemOperand()->getFlags(), SN->getAAInfo());
10767     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10768                           DAG.getConstant(16, dl, BasePtr.getValueType()));
10769     Stores.push_back(Store);
10770   }
10771   SDValue TF = DAG.getTokenFactor(dl, Stores);
10772   return TF;
10773 }
10774 
10775 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10776   SDLoc dl(Op);
10777   if (Op.getValueType() == MVT::v4i32) {
10778     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10779 
10780     SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10781     // +16 as shift amt.
10782     SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10783     SDValue RHSSwap =   // = vrlw RHS, 16
10784       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10785 
10786     // Shrinkify inputs to v8i16.
10787     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10788     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10789     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10790 
10791     // Low parts multiplied together, generating 32-bit results (we ignore the
10792     // top parts).
10793     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10794                                         LHS, RHS, DAG, dl, MVT::v4i32);
10795 
10796     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10797                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10798     // Shift the high parts up 16 bits.
10799     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10800                               Neg16, DAG, dl);
10801     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10802   } else if (Op.getValueType() == MVT::v16i8) {
10803     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10804     bool isLittleEndian = Subtarget.isLittleEndian();
10805 
10806     // Multiply the even 8-bit parts, producing 16-bit sums.
10807     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10808                                            LHS, RHS, DAG, dl, MVT::v8i16);
10809     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10810 
10811     // Multiply the odd 8-bit parts, producing 16-bit sums.
10812     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10813                                           LHS, RHS, DAG, dl, MVT::v8i16);
10814     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10815 
10816     // Merge the results together.  Because vmuleub and vmuloub are
10817     // instructions with a big-endian bias, we must reverse the
10818     // element numbering and reverse the meaning of "odd" and "even"
10819     // when generating little endian code.
10820     int Ops[16];
10821     for (unsigned i = 0; i != 8; ++i) {
10822       if (isLittleEndian) {
10823         Ops[i*2  ] = 2*i;
10824         Ops[i*2+1] = 2*i+16;
10825       } else {
10826         Ops[i*2  ] = 2*i+1;
10827         Ops[i*2+1] = 2*i+1+16;
10828       }
10829     }
10830     if (isLittleEndian)
10831       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10832     else
10833       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10834   } else {
10835     llvm_unreachable("Unknown mul to lower!");
10836   }
10837 }
10838 
10839 SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
10840 
10841   assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
10842 
10843   EVT VT = Op.getValueType();
10844   assert(VT.isVector() &&
10845          "Only set vector abs as custom, scalar abs shouldn't reach here!");
10846   assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
10847           VT == MVT::v16i8) &&
10848          "Unexpected vector element type!");
10849   assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
10850          "Current subtarget doesn't support smax v2i64!");
10851 
10852   // For vector abs, it can be lowered to:
10853   // abs x
10854   // ==>
10855   // y = -x
10856   // smax(x, y)
10857 
10858   SDLoc dl(Op);
10859   SDValue X = Op.getOperand(0);
10860   SDValue Zero = DAG.getConstant(0, dl, VT);
10861   SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
10862 
10863   // SMAX patch https://reviews.llvm.org/D47332
10864   // hasn't landed yet, so use intrinsic first here.
10865   // TODO: Should use SMAX directly once SMAX patch landed
10866   Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
10867   if (VT == MVT::v2i64)
10868     BifID = Intrinsic::ppc_altivec_vmaxsd;
10869   else if (VT == MVT::v8i16)
10870     BifID = Intrinsic::ppc_altivec_vmaxsh;
10871   else if (VT == MVT::v16i8)
10872     BifID = Intrinsic::ppc_altivec_vmaxsb;
10873 
10874   return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
10875 }
10876 
10877 // Custom lowering for fpext vf32 to v2f64
10878 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10879 
10880   assert(Op.getOpcode() == ISD::FP_EXTEND &&
10881          "Should only be called for ISD::FP_EXTEND");
10882 
10883   // FIXME: handle extends from half precision float vectors on P9.
10884   // We only want to custom lower an extend from v2f32 to v2f64.
10885   if (Op.getValueType() != MVT::v2f64 ||
10886       Op.getOperand(0).getValueType() != MVT::v2f32)
10887     return SDValue();
10888 
10889   SDLoc dl(Op);
10890   SDValue Op0 = Op.getOperand(0);
10891 
10892   switch (Op0.getOpcode()) {
10893   default:
10894     return SDValue();
10895   case ISD::EXTRACT_SUBVECTOR: {
10896     assert(Op0.getNumOperands() == 2 &&
10897            isa<ConstantSDNode>(Op0->getOperand(1)) &&
10898            "Node should have 2 operands with second one being a constant!");
10899 
10900     if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10901       return SDValue();
10902 
10903     // Custom lower is only done for high or low doubleword.
10904     int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10905     if (Idx % 2 != 0)
10906       return SDValue();
10907 
10908     // Since input is v4f32, at this point Idx is either 0 or 2.
10909     // Shift to get the doubleword position we want.
10910     int DWord = Idx >> 1;
10911 
10912     // High and low word positions are different on little endian.
10913     if (Subtarget.isLittleEndian())
10914       DWord ^= 0x1;
10915 
10916     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10917                        Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10918   }
10919   case ISD::FADD:
10920   case ISD::FMUL:
10921   case ISD::FSUB: {
10922     SDValue NewLoad[2];
10923     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10924       // Ensure both input are loads.
10925       SDValue LdOp = Op0.getOperand(i);
10926       if (LdOp.getOpcode() != ISD::LOAD)
10927         return SDValue();
10928       // Generate new load node.
10929       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10930       SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10931       NewLoad[i] = DAG.getMemIntrinsicNode(
10932           PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10933           LD->getMemoryVT(), LD->getMemOperand());
10934     }
10935     SDValue NewOp =
10936         DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10937                     NewLoad[1], Op0.getNode()->getFlags());
10938     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10939                        DAG.getConstant(0, dl, MVT::i32));
10940   }
10941   case ISD::LOAD: {
10942     LoadSDNode *LD = cast<LoadSDNode>(Op0);
10943     SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10944     SDValue NewLd = DAG.getMemIntrinsicNode(
10945         PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10946         LD->getMemoryVT(), LD->getMemOperand());
10947     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10948                        DAG.getConstant(0, dl, MVT::i32));
10949   }
10950   }
10951   llvm_unreachable("ERROR:Should return for all cases within swtich.");
10952 }
10953 
10954 /// LowerOperation - Provide custom lowering hooks for some operations.
10955 ///
10956 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10957   switch (Op.getOpcode()) {
10958   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10959   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
10960   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
10961   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
10962   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
10963   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
10964   case ISD::SETCC:              return LowerSETCC(Op, DAG);
10965   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
10966   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
10967 
10968   // Variable argument lowering.
10969   case ISD::VASTART:            return LowerVASTART(Op, DAG);
10970   case ISD::VAARG:              return LowerVAARG(Op, DAG);
10971   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
10972 
10973   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
10974   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10975   case ISD::GET_DYNAMIC_AREA_OFFSET:
10976     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10977 
10978   // Exception handling lowering.
10979   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
10980   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
10981   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
10982 
10983   case ISD::LOAD:               return LowerLOAD(Op, DAG);
10984   case ISD::STORE:              return LowerSTORE(Op, DAG);
10985   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
10986   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
10987   case ISD::STRICT_FP_TO_UINT:
10988   case ISD::STRICT_FP_TO_SINT:
10989   case ISD::FP_TO_UINT:
10990   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10991   case ISD::STRICT_UINT_TO_FP:
10992   case ISD::STRICT_SINT_TO_FP:
10993   case ISD::UINT_TO_FP:
10994   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
10995   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
10996 
10997   // Lower 64-bit shifts.
10998   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
10999   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
11000   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
11001 
11002   case ISD::FSHL:               return LowerFunnelShift(Op, DAG);
11003   case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
11004 
11005   // Vector-related lowering.
11006   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
11007   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
11008   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11009   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
11010   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
11011   case ISD::MUL:                return LowerMUL(Op, DAG);
11012   case ISD::ABS:                return LowerABS(Op, DAG);
11013   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
11014   case ISD::ROTL:               return LowerROTL(Op, DAG);
11015 
11016   // For counter-based loop handling.
11017   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
11018 
11019   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
11020 
11021   // Frame & Return address.
11022   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
11023   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
11024 
11025   case ISD::INTRINSIC_VOID:
11026     return LowerINTRINSIC_VOID(Op, DAG);
11027   case ISD::BSWAP:
11028     return LowerBSWAP(Op, DAG);
11029   case ISD::ATOMIC_CMP_SWAP:
11030     return LowerATOMIC_CMP_SWAP(Op, DAG);
11031   }
11032 }
11033 
11034 void PPCTargetLowering::LowerOperationWrapper(SDNode *N,
11035                                               SmallVectorImpl<SDValue> &Results,
11036                                               SelectionDAG &DAG) const {
11037   SDValue Res = LowerOperation(SDValue(N, 0), DAG);
11038 
11039   if (!Res.getNode())
11040     return;
11041 
11042   // Take the return value as-is if original node has only one result.
11043   if (N->getNumValues() == 1) {
11044     Results.push_back(Res);
11045     return;
11046   }
11047 
11048   // New node should have the same number of results.
11049   assert((N->getNumValues() == Res->getNumValues()) &&
11050       "Lowering returned the wrong number of results!");
11051 
11052   for (unsigned i = 0; i < N->getNumValues(); ++i)
11053     Results.push_back(Res.getValue(i));
11054 }
11055 
11056 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
11057                                            SmallVectorImpl<SDValue>&Results,
11058                                            SelectionDAG &DAG) const {
11059   SDLoc dl(N);
11060   switch (N->getOpcode()) {
11061   default:
11062     llvm_unreachable("Do not know how to custom type legalize this operation!");
11063   case ISD::READCYCLECOUNTER: {
11064     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11065     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11066 
11067     Results.push_back(
11068         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11069     Results.push_back(RTB.getValue(2));
11070     break;
11071   }
11072   case ISD::INTRINSIC_W_CHAIN: {
11073     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11074         Intrinsic::loop_decrement)
11075       break;
11076 
11077     assert(N->getValueType(0) == MVT::i1 &&
11078            "Unexpected result type for CTR decrement intrinsic");
11079     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11080                                  N->getValueType(0));
11081     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11082     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11083                                  N->getOperand(1));
11084 
11085     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11086     Results.push_back(NewInt.getValue(1));
11087     break;
11088   }
11089   case ISD::VAARG: {
11090     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11091       return;
11092 
11093     EVT VT = N->getValueType(0);
11094 
11095     if (VT == MVT::i64) {
11096       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11097 
11098       Results.push_back(NewNode);
11099       Results.push_back(NewNode.getValue(1));
11100     }
11101     return;
11102   }
11103   case ISD::STRICT_FP_TO_SINT:
11104   case ISD::STRICT_FP_TO_UINT:
11105   case ISD::FP_TO_SINT:
11106   case ISD::FP_TO_UINT:
11107     // LowerFP_TO_INT() can only handle f32 and f64.
11108     if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11109         MVT::ppcf128)
11110       return;
11111     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
11112     return;
11113   case ISD::TRUNCATE: {
11114     if (!N->getValueType(0).isVector())
11115       return;
11116     SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11117     if (Lowered)
11118       Results.push_back(Lowered);
11119     return;
11120   }
11121   case ISD::FSHL:
11122   case ISD::FSHR:
11123     // Don't handle funnel shifts here.
11124     return;
11125   case ISD::BITCAST:
11126     // Don't handle bitcast here.
11127     return;
11128   case ISD::FP_EXTEND:
11129     SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11130     if (Lowered)
11131       Results.push_back(Lowered);
11132     return;
11133   }
11134 }
11135 
11136 //===----------------------------------------------------------------------===//
11137 //  Other Lowering Code
11138 //===----------------------------------------------------------------------===//
11139 
11140 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
11141   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11142   Function *Func = Intrinsic::getDeclaration(M, Id);
11143   return Builder.CreateCall(Func, {});
11144 }
11145 
11146 // The mappings for emitLeading/TrailingFence is taken from
11147 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11148 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
11149                                                  Instruction *Inst,
11150                                                  AtomicOrdering Ord) const {
11151   if (Ord == AtomicOrdering::SequentiallyConsistent)
11152     return callIntrinsic(Builder, Intrinsic::ppc_sync);
11153   if (isReleaseOrStronger(Ord))
11154     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11155   return nullptr;
11156 }
11157 
11158 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
11159                                                   Instruction *Inst,
11160                                                   AtomicOrdering Ord) const {
11161   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11162     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11163     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11164     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11165     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11166       return Builder.CreateCall(
11167           Intrinsic::getDeclaration(
11168               Builder.GetInsertBlock()->getParent()->getParent(),
11169               Intrinsic::ppc_cfence, {Inst->getType()}),
11170           {Inst});
11171     // FIXME: Can use isync for rmw operation.
11172     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11173   }
11174   return nullptr;
11175 }
11176 
11177 MachineBasicBlock *
11178 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
11179                                     unsigned AtomicSize,
11180                                     unsigned BinOpcode,
11181                                     unsigned CmpOpcode,
11182                                     unsigned CmpPred) const {
11183   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11184   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11185 
11186   auto LoadMnemonic = PPC::LDARX;
11187   auto StoreMnemonic = PPC::STDCX;
11188   switch (AtomicSize) {
11189   default:
11190     llvm_unreachable("Unexpected size of atomic entity");
11191   case 1:
11192     LoadMnemonic = PPC::LBARX;
11193     StoreMnemonic = PPC::STBCX;
11194     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11195     break;
11196   case 2:
11197     LoadMnemonic = PPC::LHARX;
11198     StoreMnemonic = PPC::STHCX;
11199     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11200     break;
11201   case 4:
11202     LoadMnemonic = PPC::LWARX;
11203     StoreMnemonic = PPC::STWCX;
11204     break;
11205   case 8:
11206     LoadMnemonic = PPC::LDARX;
11207     StoreMnemonic = PPC::STDCX;
11208     break;
11209   }
11210 
11211   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11212   MachineFunction *F = BB->getParent();
11213   MachineFunction::iterator It = ++BB->getIterator();
11214 
11215   Register dest = MI.getOperand(0).getReg();
11216   Register ptrA = MI.getOperand(1).getReg();
11217   Register ptrB = MI.getOperand(2).getReg();
11218   Register incr = MI.getOperand(3).getReg();
11219   DebugLoc dl = MI.getDebugLoc();
11220 
11221   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11222   MachineBasicBlock *loop2MBB =
11223     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11224   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11225   F->insert(It, loopMBB);
11226   if (CmpOpcode)
11227     F->insert(It, loop2MBB);
11228   F->insert(It, exitMBB);
11229   exitMBB->splice(exitMBB->begin(), BB,
11230                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11231   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11232 
11233   MachineRegisterInfo &RegInfo = F->getRegInfo();
11234   Register TmpReg = (!BinOpcode) ? incr :
11235     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11236                                            : &PPC::GPRCRegClass);
11237 
11238   //  thisMBB:
11239   //   ...
11240   //   fallthrough --> loopMBB
11241   BB->addSuccessor(loopMBB);
11242 
11243   //  loopMBB:
11244   //   l[wd]arx dest, ptr
11245   //   add r0, dest, incr
11246   //   st[wd]cx. r0, ptr
11247   //   bne- loopMBB
11248   //   fallthrough --> exitMBB
11249 
11250   // For max/min...
11251   //  loopMBB:
11252   //   l[wd]arx dest, ptr
11253   //   cmpl?[wd] incr, dest
11254   //   bgt exitMBB
11255   //  loop2MBB:
11256   //   st[wd]cx. dest, ptr
11257   //   bne- loopMBB
11258   //   fallthrough --> exitMBB
11259 
11260   BB = loopMBB;
11261   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11262     .addReg(ptrA).addReg(ptrB);
11263   if (BinOpcode)
11264     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11265   if (CmpOpcode) {
11266     // Signed comparisons of byte or halfword values must be sign-extended.
11267     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11268       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11269       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11270               ExtReg).addReg(dest);
11271       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11272         .addReg(incr).addReg(ExtReg);
11273     } else
11274       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11275         .addReg(incr).addReg(dest);
11276 
11277     BuildMI(BB, dl, TII->get(PPC::BCC))
11278       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11279     BB->addSuccessor(loop2MBB);
11280     BB->addSuccessor(exitMBB);
11281     BB = loop2MBB;
11282   }
11283   BuildMI(BB, dl, TII->get(StoreMnemonic))
11284     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11285   BuildMI(BB, dl, TII->get(PPC::BCC))
11286     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11287   BB->addSuccessor(loopMBB);
11288   BB->addSuccessor(exitMBB);
11289 
11290   //  exitMBB:
11291   //   ...
11292   BB = exitMBB;
11293   return BB;
11294 }
11295 
11296 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
11297     MachineInstr &MI, MachineBasicBlock *BB,
11298     bool is8bit, // operation
11299     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11300   // If we support part-word atomic mnemonics, just use them
11301   if (Subtarget.hasPartwordAtomics())
11302     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11303                             CmpPred);
11304 
11305   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11306   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11307   // In 64 bit mode we have to use 64 bits for addresses, even though the
11308   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
11309   // registers without caring whether they're 32 or 64, but here we're
11310   // doing actual arithmetic on the addresses.
11311   bool is64bit = Subtarget.isPPC64();
11312   bool isLittleEndian = Subtarget.isLittleEndian();
11313   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11314 
11315   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11316   MachineFunction *F = BB->getParent();
11317   MachineFunction::iterator It = ++BB->getIterator();
11318 
11319   Register dest = MI.getOperand(0).getReg();
11320   Register ptrA = MI.getOperand(1).getReg();
11321   Register ptrB = MI.getOperand(2).getReg();
11322   Register incr = MI.getOperand(3).getReg();
11323   DebugLoc dl = MI.getDebugLoc();
11324 
11325   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11326   MachineBasicBlock *loop2MBB =
11327       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11328   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11329   F->insert(It, loopMBB);
11330   if (CmpOpcode)
11331     F->insert(It, loop2MBB);
11332   F->insert(It, exitMBB);
11333   exitMBB->splice(exitMBB->begin(), BB,
11334                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11335   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11336 
11337   MachineRegisterInfo &RegInfo = F->getRegInfo();
11338   const TargetRegisterClass *RC =
11339       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11340   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11341 
11342   Register PtrReg = RegInfo.createVirtualRegister(RC);
11343   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11344   Register ShiftReg =
11345       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11346   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11347   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11348   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11349   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11350   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11351   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11352   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11353   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11354   Register Ptr1Reg;
11355   Register TmpReg =
11356       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11357 
11358   //  thisMBB:
11359   //   ...
11360   //   fallthrough --> loopMBB
11361   BB->addSuccessor(loopMBB);
11362 
11363   // The 4-byte load must be aligned, while a char or short may be
11364   // anywhere in the word.  Hence all this nasty bookkeeping code.
11365   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
11366   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11367   //   xori shift, shift1, 24 [16]
11368   //   rlwinm ptr, ptr1, 0, 0, 29
11369   //   slw incr2, incr, shift
11370   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11371   //   slw mask, mask2, shift
11372   //  loopMBB:
11373   //   lwarx tmpDest, ptr
11374   //   add tmp, tmpDest, incr2
11375   //   andc tmp2, tmpDest, mask
11376   //   and tmp3, tmp, mask
11377   //   or tmp4, tmp3, tmp2
11378   //   stwcx. tmp4, ptr
11379   //   bne- loopMBB
11380   //   fallthrough --> exitMBB
11381   //   srw dest, tmpDest, shift
11382   if (ptrA != ZeroReg) {
11383     Ptr1Reg = RegInfo.createVirtualRegister(RC);
11384     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11385         .addReg(ptrA)
11386         .addReg(ptrB);
11387   } else {
11388     Ptr1Reg = ptrB;
11389   }
11390   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11391   // mode.
11392   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11393       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11394       .addImm(3)
11395       .addImm(27)
11396       .addImm(is8bit ? 28 : 27);
11397   if (!isLittleEndian)
11398     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11399         .addReg(Shift1Reg)
11400         .addImm(is8bit ? 24 : 16);
11401   if (is64bit)
11402     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11403         .addReg(Ptr1Reg)
11404         .addImm(0)
11405         .addImm(61);
11406   else
11407     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11408         .addReg(Ptr1Reg)
11409         .addImm(0)
11410         .addImm(0)
11411         .addImm(29);
11412   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11413   if (is8bit)
11414     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11415   else {
11416     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11417     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11418         .addReg(Mask3Reg)
11419         .addImm(65535);
11420   }
11421   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11422       .addReg(Mask2Reg)
11423       .addReg(ShiftReg);
11424 
11425   BB = loopMBB;
11426   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11427       .addReg(ZeroReg)
11428       .addReg(PtrReg);
11429   if (BinOpcode)
11430     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11431         .addReg(Incr2Reg)
11432         .addReg(TmpDestReg);
11433   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11434       .addReg(TmpDestReg)
11435       .addReg(MaskReg);
11436   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11437   if (CmpOpcode) {
11438     // For unsigned comparisons, we can directly compare the shifted values.
11439     // For signed comparisons we shift and sign extend.
11440     Register SReg = RegInfo.createVirtualRegister(GPRC);
11441     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11442         .addReg(TmpDestReg)
11443         .addReg(MaskReg);
11444     unsigned ValueReg = SReg;
11445     unsigned CmpReg = Incr2Reg;
11446     if (CmpOpcode == PPC::CMPW) {
11447       ValueReg = RegInfo.createVirtualRegister(GPRC);
11448       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11449           .addReg(SReg)
11450           .addReg(ShiftReg);
11451       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11452       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11453           .addReg(ValueReg);
11454       ValueReg = ValueSReg;
11455       CmpReg = incr;
11456     }
11457     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11458         .addReg(CmpReg)
11459         .addReg(ValueReg);
11460     BuildMI(BB, dl, TII->get(PPC::BCC))
11461         .addImm(CmpPred)
11462         .addReg(PPC::CR0)
11463         .addMBB(exitMBB);
11464     BB->addSuccessor(loop2MBB);
11465     BB->addSuccessor(exitMBB);
11466     BB = loop2MBB;
11467   }
11468   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11469   BuildMI(BB, dl, TII->get(PPC::STWCX))
11470       .addReg(Tmp4Reg)
11471       .addReg(ZeroReg)
11472       .addReg(PtrReg);
11473   BuildMI(BB, dl, TII->get(PPC::BCC))
11474       .addImm(PPC::PRED_NE)
11475       .addReg(PPC::CR0)
11476       .addMBB(loopMBB);
11477   BB->addSuccessor(loopMBB);
11478   BB->addSuccessor(exitMBB);
11479 
11480   //  exitMBB:
11481   //   ...
11482   BB = exitMBB;
11483   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11484       .addReg(TmpDestReg)
11485       .addReg(ShiftReg);
11486   return BB;
11487 }
11488 
11489 llvm::MachineBasicBlock *
11490 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
11491                                     MachineBasicBlock *MBB) const {
11492   DebugLoc DL = MI.getDebugLoc();
11493   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11494   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11495 
11496   MachineFunction *MF = MBB->getParent();
11497   MachineRegisterInfo &MRI = MF->getRegInfo();
11498 
11499   const BasicBlock *BB = MBB->getBasicBlock();
11500   MachineFunction::iterator I = ++MBB->getIterator();
11501 
11502   Register DstReg = MI.getOperand(0).getReg();
11503   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11504   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11505   Register mainDstReg = MRI.createVirtualRegister(RC);
11506   Register restoreDstReg = MRI.createVirtualRegister(RC);
11507 
11508   MVT PVT = getPointerTy(MF->getDataLayout());
11509   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11510          "Invalid Pointer Size!");
11511   // For v = setjmp(buf), we generate
11512   //
11513   // thisMBB:
11514   //  SjLjSetup mainMBB
11515   //  bl mainMBB
11516   //  v_restore = 1
11517   //  b sinkMBB
11518   //
11519   // mainMBB:
11520   //  buf[LabelOffset] = LR
11521   //  v_main = 0
11522   //
11523   // sinkMBB:
11524   //  v = phi(main, restore)
11525   //
11526 
11527   MachineBasicBlock *thisMBB = MBB;
11528   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
11529   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
11530   MF->insert(I, mainMBB);
11531   MF->insert(I, sinkMBB);
11532 
11533   MachineInstrBuilder MIB;
11534 
11535   // Transfer the remainder of BB and its successor edges to sinkMBB.
11536   sinkMBB->splice(sinkMBB->begin(), MBB,
11537                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11538   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
11539 
11540   // Note that the structure of the jmp_buf used here is not compatible
11541   // with that used by libc, and is not designed to be. Specifically, it
11542   // stores only those 'reserved' registers that LLVM does not otherwise
11543   // understand how to spill. Also, by convention, by the time this
11544   // intrinsic is called, Clang has already stored the frame address in the
11545   // first slot of the buffer and stack address in the third. Following the
11546   // X86 target code, we'll store the jump address in the second slot. We also
11547   // need to save the TOC pointer (R2) to handle jumps between shared
11548   // libraries, and that will be stored in the fourth slot. The thread
11549   // identifier (R13) is not affected.
11550 
11551   // thisMBB:
11552   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11553   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11554   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11555 
11556   // Prepare IP either in reg.
11557   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11558   Register LabelReg = MRI.createVirtualRegister(PtrRC);
11559   Register BufReg = MI.getOperand(1).getReg();
11560 
11561   if (Subtarget.is64BitELFABI()) {
11562     setUsesTOCBasePtr(*MBB->getParent());
11563     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11564               .addReg(PPC::X2)
11565               .addImm(TOCOffset)
11566               .addReg(BufReg)
11567               .cloneMemRefs(MI);
11568   }
11569 
11570   // Naked functions never have a base pointer, and so we use r1. For all
11571   // other functions, this decision must be delayed until during PEI.
11572   unsigned BaseReg;
11573   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11574     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11575   else
11576     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11577 
11578   MIB = BuildMI(*thisMBB, MI, DL,
11579                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11580             .addReg(BaseReg)
11581             .addImm(BPOffset)
11582             .addReg(BufReg)
11583             .cloneMemRefs(MI);
11584 
11585   // Setup
11586   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11587   MIB.addRegMask(TRI->getNoPreservedMask());
11588 
11589   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11590 
11591   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11592           .addMBB(mainMBB);
11593   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11594 
11595   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11596   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11597 
11598   // mainMBB:
11599   //  mainDstReg = 0
11600   MIB =
11601       BuildMI(mainMBB, DL,
11602               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11603 
11604   // Store IP
11605   if (Subtarget.isPPC64()) {
11606     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11607             .addReg(LabelReg)
11608             .addImm(LabelOffset)
11609             .addReg(BufReg);
11610   } else {
11611     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11612             .addReg(LabelReg)
11613             .addImm(LabelOffset)
11614             .addReg(BufReg);
11615   }
11616   MIB.cloneMemRefs(MI);
11617 
11618   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11619   mainMBB->addSuccessor(sinkMBB);
11620 
11621   // sinkMBB:
11622   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11623           TII->get(PPC::PHI), DstReg)
11624     .addReg(mainDstReg).addMBB(mainMBB)
11625     .addReg(restoreDstReg).addMBB(thisMBB);
11626 
11627   MI.eraseFromParent();
11628   return sinkMBB;
11629 }
11630 
11631 MachineBasicBlock *
11632 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11633                                      MachineBasicBlock *MBB) const {
11634   DebugLoc DL = MI.getDebugLoc();
11635   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11636 
11637   MachineFunction *MF = MBB->getParent();
11638   MachineRegisterInfo &MRI = MF->getRegInfo();
11639 
11640   MVT PVT = getPointerTy(MF->getDataLayout());
11641   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11642          "Invalid Pointer Size!");
11643 
11644   const TargetRegisterClass *RC =
11645     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11646   Register Tmp = MRI.createVirtualRegister(RC);
11647   // Since FP is only updated here but NOT referenced, it's treated as GPR.
11648   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11649   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11650   unsigned BP =
11651       (PVT == MVT::i64)
11652           ? PPC::X30
11653           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11654                                                               : PPC::R30);
11655 
11656   MachineInstrBuilder MIB;
11657 
11658   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11659   const int64_t SPOffset    = 2 * PVT.getStoreSize();
11660   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11661   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11662 
11663   Register BufReg = MI.getOperand(0).getReg();
11664 
11665   // Reload FP (the jumped-to function may not have had a
11666   // frame pointer, and if so, then its r31 will be restored
11667   // as necessary).
11668   if (PVT == MVT::i64) {
11669     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11670             .addImm(0)
11671             .addReg(BufReg);
11672   } else {
11673     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11674             .addImm(0)
11675             .addReg(BufReg);
11676   }
11677   MIB.cloneMemRefs(MI);
11678 
11679   // Reload IP
11680   if (PVT == MVT::i64) {
11681     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11682             .addImm(LabelOffset)
11683             .addReg(BufReg);
11684   } else {
11685     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11686             .addImm(LabelOffset)
11687             .addReg(BufReg);
11688   }
11689   MIB.cloneMemRefs(MI);
11690 
11691   // Reload SP
11692   if (PVT == MVT::i64) {
11693     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11694             .addImm(SPOffset)
11695             .addReg(BufReg);
11696   } else {
11697     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11698             .addImm(SPOffset)
11699             .addReg(BufReg);
11700   }
11701   MIB.cloneMemRefs(MI);
11702 
11703   // Reload BP
11704   if (PVT == MVT::i64) {
11705     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11706             .addImm(BPOffset)
11707             .addReg(BufReg);
11708   } else {
11709     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11710             .addImm(BPOffset)
11711             .addReg(BufReg);
11712   }
11713   MIB.cloneMemRefs(MI);
11714 
11715   // Reload TOC
11716   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11717     setUsesTOCBasePtr(*MBB->getParent());
11718     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11719               .addImm(TOCOffset)
11720               .addReg(BufReg)
11721               .cloneMemRefs(MI);
11722   }
11723 
11724   // Jump
11725   BuildMI(*MBB, MI, DL,
11726           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11727   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11728 
11729   MI.eraseFromParent();
11730   return MBB;
11731 }
11732 
11733 bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
11734   // If the function specifically requests inline stack probes, emit them.
11735   if (MF.getFunction().hasFnAttribute("probe-stack"))
11736     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11737            "inline-asm";
11738   return false;
11739 }
11740 
11741 unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
11742   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11743   unsigned StackAlign = TFI->getStackAlignment();
11744   assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
11745          "Unexpected stack alignment");
11746   // The default stack probe size is 4096 if the function has no
11747   // stack-probe-size attribute.
11748   unsigned StackProbeSize = 4096;
11749   const Function &Fn = MF.getFunction();
11750   if (Fn.hasFnAttribute("stack-probe-size"))
11751     Fn.getFnAttribute("stack-probe-size")
11752         .getValueAsString()
11753         .getAsInteger(0, StackProbeSize);
11754   // Round down to the stack alignment.
11755   StackProbeSize &= ~(StackAlign - 1);
11756   return StackProbeSize ? StackProbeSize : StackAlign;
11757 }
11758 
11759 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11760 // into three phases. In the first phase, it uses pseudo instruction
11761 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11762 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11763 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11764 // MaxCallFrameSize so that it can calculate correct data area pointer.
11765 MachineBasicBlock *
11766 PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
11767                                     MachineBasicBlock *MBB) const {
11768   const bool isPPC64 = Subtarget.isPPC64();
11769   MachineFunction *MF = MBB->getParent();
11770   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11771   DebugLoc DL = MI.getDebugLoc();
11772   const unsigned ProbeSize = getStackProbeSize(*MF);
11773   const BasicBlock *ProbedBB = MBB->getBasicBlock();
11774   MachineRegisterInfo &MRI = MF->getRegInfo();
11775   // The CFG of probing stack looks as
11776   //         +-----+
11777   //         | MBB |
11778   //         +--+--+
11779   //            |
11780   //       +----v----+
11781   //  +--->+ TestMBB +---+
11782   //  |    +----+----+   |
11783   //  |         |        |
11784   //  |   +-----v----+   |
11785   //  +---+ BlockMBB |   |
11786   //      +----------+   |
11787   //                     |
11788   //       +---------+   |
11789   //       | TailMBB +<--+
11790   //       +---------+
11791   // In MBB, calculate previous frame pointer and final stack pointer.
11792   // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11793   // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11794   // TailMBB is spliced via \p MI.
11795   MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11796   MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11797   MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11798 
11799   MachineFunction::iterator MBBIter = ++MBB->getIterator();
11800   MF->insert(MBBIter, TestMBB);
11801   MF->insert(MBBIter, BlockMBB);
11802   MF->insert(MBBIter, TailMBB);
11803 
11804   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11805   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11806 
11807   Register DstReg = MI.getOperand(0).getReg();
11808   Register NegSizeReg = MI.getOperand(1).getReg();
11809   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11810   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11811   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11812   Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11813 
11814   // Since value of NegSizeReg might be realigned in prologepilog, insert a
11815   // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11816   // NegSize.
11817   unsigned ProbeOpc;
11818   if (!MRI.hasOneNonDBGUse(NegSizeReg))
11819     ProbeOpc =
11820         isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11821   else
11822     // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11823     // and NegSizeReg will be allocated in the same phyreg to avoid
11824     // redundant copy when NegSizeReg has only one use which is current MI and
11825     // will be replaced by PREPARE_PROBED_ALLOCA then.
11826     ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11827                        : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11828   BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11829       .addDef(ActualNegSizeReg)
11830       .addReg(NegSizeReg)
11831       .add(MI.getOperand(2))
11832       .add(MI.getOperand(3));
11833 
11834   // Calculate final stack pointer, which equals to SP + ActualNegSize.
11835   BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11836           FinalStackPtr)
11837       .addReg(SPReg)
11838       .addReg(ActualNegSizeReg);
11839 
11840   // Materialize a scratch register for update.
11841   int64_t NegProbeSize = -(int64_t)ProbeSize;
11842   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11843   Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11844   if (!isInt<16>(NegProbeSize)) {
11845     Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11846     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11847         .addImm(NegProbeSize >> 16);
11848     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11849             ScratchReg)
11850         .addReg(TempReg)
11851         .addImm(NegProbeSize & 0xFFFF);
11852   } else
11853     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11854         .addImm(NegProbeSize);
11855 
11856   {
11857     // Probing leading residual part.
11858     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11859     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11860         .addReg(ActualNegSizeReg)
11861         .addReg(ScratchReg);
11862     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11863     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11864         .addReg(Div)
11865         .addReg(ScratchReg);
11866     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11867     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11868         .addReg(Mul)
11869         .addReg(ActualNegSizeReg);
11870     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11871         .addReg(FramePointer)
11872         .addReg(SPReg)
11873         .addReg(NegMod);
11874   }
11875 
11876   {
11877     // Remaining part should be multiple of ProbeSize.
11878     Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11879     BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11880         .addReg(SPReg)
11881         .addReg(FinalStackPtr);
11882     BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11883         .addImm(PPC::PRED_EQ)
11884         .addReg(CmpResult)
11885         .addMBB(TailMBB);
11886     TestMBB->addSuccessor(BlockMBB);
11887     TestMBB->addSuccessor(TailMBB);
11888   }
11889 
11890   {
11891     // Touch the block.
11892     // |P...|P...|P...
11893     BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11894         .addReg(FramePointer)
11895         .addReg(SPReg)
11896         .addReg(ScratchReg);
11897     BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11898     BlockMBB->addSuccessor(TestMBB);
11899   }
11900 
11901   // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11902   // DYNAREAOFFSET pseudo instruction to get the future result.
11903   Register MaxCallFrameSizeReg =
11904       MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11905   BuildMI(TailMBB, DL,
11906           TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11907           MaxCallFrameSizeReg)
11908       .add(MI.getOperand(2))
11909       .add(MI.getOperand(3));
11910   BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11911       .addReg(SPReg)
11912       .addReg(MaxCallFrameSizeReg);
11913 
11914   // Splice instructions after MI to TailMBB.
11915   TailMBB->splice(TailMBB->end(), MBB,
11916                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11917   TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
11918   MBB->addSuccessor(TestMBB);
11919 
11920   // Delete the pseudo instruction.
11921   MI.eraseFromParent();
11922 
11923   ++NumDynamicAllocaProbed;
11924   return TailMBB;
11925 }
11926 
11927 MachineBasicBlock *
11928 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11929                                                MachineBasicBlock *BB) const {
11930   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11931       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11932     if (Subtarget.is64BitELFABI() &&
11933         MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11934         !Subtarget.isUsingPCRelativeCalls()) {
11935       // Call lowering should have added an r2 operand to indicate a dependence
11936       // on the TOC base pointer value. It can't however, because there is no
11937       // way to mark the dependence as implicit there, and so the stackmap code
11938       // will confuse it with a regular operand. Instead, add the dependence
11939       // here.
11940       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11941     }
11942 
11943     return emitPatchPoint(MI, BB);
11944   }
11945 
11946   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11947       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11948     return emitEHSjLjSetJmp(MI, BB);
11949   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11950              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11951     return emitEHSjLjLongJmp(MI, BB);
11952   }
11953 
11954   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11955 
11956   // To "insert" these instructions we actually have to insert their
11957   // control-flow patterns.
11958   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11959   MachineFunction::iterator It = ++BB->getIterator();
11960 
11961   MachineFunction *F = BB->getParent();
11962 
11963   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11964       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11965       MI.getOpcode() == PPC::SELECT_I8) {
11966     SmallVector<MachineOperand, 2> Cond;
11967     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11968         MI.getOpcode() == PPC::SELECT_CC_I8)
11969       Cond.push_back(MI.getOperand(4));
11970     else
11971       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
11972     Cond.push_back(MI.getOperand(1));
11973 
11974     DebugLoc dl = MI.getDebugLoc();
11975     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11976                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11977   } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11978              MI.getOpcode() == PPC::SELECT_CC_F8 ||
11979              MI.getOpcode() == PPC::SELECT_CC_F16 ||
11980              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11981              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11982              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11983              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11984              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11985              MI.getOpcode() == PPC::SELECT_CC_SPE ||
11986              MI.getOpcode() == PPC::SELECT_F4 ||
11987              MI.getOpcode() == PPC::SELECT_F8 ||
11988              MI.getOpcode() == PPC::SELECT_F16 ||
11989              MI.getOpcode() == PPC::SELECT_SPE ||
11990              MI.getOpcode() == PPC::SELECT_SPE4 ||
11991              MI.getOpcode() == PPC::SELECT_VRRC ||
11992              MI.getOpcode() == PPC::SELECT_VSFRC ||
11993              MI.getOpcode() == PPC::SELECT_VSSRC ||
11994              MI.getOpcode() == PPC::SELECT_VSRC) {
11995     // The incoming instruction knows the destination vreg to set, the
11996     // condition code register to branch on, the true/false values to
11997     // select between, and a branch opcode to use.
11998 
11999     //  thisMBB:
12000     //  ...
12001     //   TrueVal = ...
12002     //   cmpTY ccX, r1, r2
12003     //   bCC copy1MBB
12004     //   fallthrough --> copy0MBB
12005     MachineBasicBlock *thisMBB = BB;
12006     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12007     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12008     DebugLoc dl = MI.getDebugLoc();
12009     F->insert(It, copy0MBB);
12010     F->insert(It, sinkMBB);
12011 
12012     // Transfer the remainder of BB and its successor edges to sinkMBB.
12013     sinkMBB->splice(sinkMBB->begin(), BB,
12014                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12015     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12016 
12017     // Next, add the true and fallthrough blocks as its successors.
12018     BB->addSuccessor(copy0MBB);
12019     BB->addSuccessor(sinkMBB);
12020 
12021     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12022         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12023         MI.getOpcode() == PPC::SELECT_F16 ||
12024         MI.getOpcode() == PPC::SELECT_SPE4 ||
12025         MI.getOpcode() == PPC::SELECT_SPE ||
12026         MI.getOpcode() == PPC::SELECT_VRRC ||
12027         MI.getOpcode() == PPC::SELECT_VSFRC ||
12028         MI.getOpcode() == PPC::SELECT_VSSRC ||
12029         MI.getOpcode() == PPC::SELECT_VSRC) {
12030       BuildMI(BB, dl, TII->get(PPC::BC))
12031           .addReg(MI.getOperand(1).getReg())
12032           .addMBB(sinkMBB);
12033     } else {
12034       unsigned SelectPred = MI.getOperand(4).getImm();
12035       BuildMI(BB, dl, TII->get(PPC::BCC))
12036           .addImm(SelectPred)
12037           .addReg(MI.getOperand(1).getReg())
12038           .addMBB(sinkMBB);
12039     }
12040 
12041     //  copy0MBB:
12042     //   %FalseValue = ...
12043     //   # fallthrough to sinkMBB
12044     BB = copy0MBB;
12045 
12046     // Update machine-CFG edges
12047     BB->addSuccessor(sinkMBB);
12048 
12049     //  sinkMBB:
12050     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12051     //  ...
12052     BB = sinkMBB;
12053     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12054         .addReg(MI.getOperand(3).getReg())
12055         .addMBB(copy0MBB)
12056         .addReg(MI.getOperand(2).getReg())
12057         .addMBB(thisMBB);
12058   } else if (MI.getOpcode() == PPC::ReadTB) {
12059     // To read the 64-bit time-base register on a 32-bit target, we read the
12060     // two halves. Should the counter have wrapped while it was being read, we
12061     // need to try again.
12062     // ...
12063     // readLoop:
12064     // mfspr Rx,TBU # load from TBU
12065     // mfspr Ry,TB  # load from TB
12066     // mfspr Rz,TBU # load from TBU
12067     // cmpw crX,Rx,Rz # check if 'old'='new'
12068     // bne readLoop   # branch if they're not equal
12069     // ...
12070 
12071     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12072     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12073     DebugLoc dl = MI.getDebugLoc();
12074     F->insert(It, readMBB);
12075     F->insert(It, sinkMBB);
12076 
12077     // Transfer the remainder of BB and its successor edges to sinkMBB.
12078     sinkMBB->splice(sinkMBB->begin(), BB,
12079                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12080     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12081 
12082     BB->addSuccessor(readMBB);
12083     BB = readMBB;
12084 
12085     MachineRegisterInfo &RegInfo = F->getRegInfo();
12086     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12087     Register LoReg = MI.getOperand(0).getReg();
12088     Register HiReg = MI.getOperand(1).getReg();
12089 
12090     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12091     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12092     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12093 
12094     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12095 
12096     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12097         .addReg(HiReg)
12098         .addReg(ReadAgainReg);
12099     BuildMI(BB, dl, TII->get(PPC::BCC))
12100         .addImm(PPC::PRED_NE)
12101         .addReg(CmpReg)
12102         .addMBB(readMBB);
12103 
12104     BB->addSuccessor(readMBB);
12105     BB->addSuccessor(sinkMBB);
12106   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12107     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12108   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12109     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12110   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12111     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12112   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12113     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12114 
12115   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12116     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12117   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12118     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12119   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12120     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12121   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12122     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12123 
12124   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12125     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12126   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12127     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12128   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12129     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12130   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12131     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12132 
12133   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12134     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12135   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12136     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12137   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12138     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12139   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12140     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12141 
12142   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12143     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12144   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12145     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12146   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12147     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12148   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12149     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12150 
12151   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12152     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12153   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12154     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12155   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12156     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12157   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12158     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12159 
12160   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12161     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12162   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12163     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12164   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12165     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12166   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12167     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12168 
12169   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12170     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12171   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12172     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12173   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12174     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12175   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12176     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12177 
12178   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12179     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12180   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12181     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12182   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12183     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12184   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12185     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12186 
12187   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12188     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12189   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12190     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12191   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12192     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12193   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12194     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12195 
12196   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12197     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12198   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12199     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12200   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12201     BB = EmitAtomicBinary(MI, BB, 4, 0);
12202   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12203     BB = EmitAtomicBinary(MI, BB, 8, 0);
12204   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12205            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12206            (Subtarget.hasPartwordAtomics() &&
12207             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12208            (Subtarget.hasPartwordAtomics() &&
12209             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12210     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12211 
12212     auto LoadMnemonic = PPC::LDARX;
12213     auto StoreMnemonic = PPC::STDCX;
12214     switch (MI.getOpcode()) {
12215     default:
12216       llvm_unreachable("Compare and swap of unknown size");
12217     case PPC::ATOMIC_CMP_SWAP_I8:
12218       LoadMnemonic = PPC::LBARX;
12219       StoreMnemonic = PPC::STBCX;
12220       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12221       break;
12222     case PPC::ATOMIC_CMP_SWAP_I16:
12223       LoadMnemonic = PPC::LHARX;
12224       StoreMnemonic = PPC::STHCX;
12225       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12226       break;
12227     case PPC::ATOMIC_CMP_SWAP_I32:
12228       LoadMnemonic = PPC::LWARX;
12229       StoreMnemonic = PPC::STWCX;
12230       break;
12231     case PPC::ATOMIC_CMP_SWAP_I64:
12232       LoadMnemonic = PPC::LDARX;
12233       StoreMnemonic = PPC::STDCX;
12234       break;
12235     }
12236     Register dest = MI.getOperand(0).getReg();
12237     Register ptrA = MI.getOperand(1).getReg();
12238     Register ptrB = MI.getOperand(2).getReg();
12239     Register oldval = MI.getOperand(3).getReg();
12240     Register newval = MI.getOperand(4).getReg();
12241     DebugLoc dl = MI.getDebugLoc();
12242 
12243     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12244     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12245     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12246     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12247     F->insert(It, loop1MBB);
12248     F->insert(It, loop2MBB);
12249     F->insert(It, midMBB);
12250     F->insert(It, exitMBB);
12251     exitMBB->splice(exitMBB->begin(), BB,
12252                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12253     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12254 
12255     //  thisMBB:
12256     //   ...
12257     //   fallthrough --> loopMBB
12258     BB->addSuccessor(loop1MBB);
12259 
12260     // loop1MBB:
12261     //   l[bhwd]arx dest, ptr
12262     //   cmp[wd] dest, oldval
12263     //   bne- midMBB
12264     // loop2MBB:
12265     //   st[bhwd]cx. newval, ptr
12266     //   bne- loopMBB
12267     //   b exitBB
12268     // midMBB:
12269     //   st[bhwd]cx. dest, ptr
12270     // exitBB:
12271     BB = loop1MBB;
12272     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12273     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12274         .addReg(oldval)
12275         .addReg(dest);
12276     BuildMI(BB, dl, TII->get(PPC::BCC))
12277         .addImm(PPC::PRED_NE)
12278         .addReg(PPC::CR0)
12279         .addMBB(midMBB);
12280     BB->addSuccessor(loop2MBB);
12281     BB->addSuccessor(midMBB);
12282 
12283     BB = loop2MBB;
12284     BuildMI(BB, dl, TII->get(StoreMnemonic))
12285         .addReg(newval)
12286         .addReg(ptrA)
12287         .addReg(ptrB);
12288     BuildMI(BB, dl, TII->get(PPC::BCC))
12289         .addImm(PPC::PRED_NE)
12290         .addReg(PPC::CR0)
12291         .addMBB(loop1MBB);
12292     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12293     BB->addSuccessor(loop1MBB);
12294     BB->addSuccessor(exitMBB);
12295 
12296     BB = midMBB;
12297     BuildMI(BB, dl, TII->get(StoreMnemonic))
12298         .addReg(dest)
12299         .addReg(ptrA)
12300         .addReg(ptrB);
12301     BB->addSuccessor(exitMBB);
12302 
12303     //  exitMBB:
12304     //   ...
12305     BB = exitMBB;
12306   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12307              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12308     // We must use 64-bit registers for addresses when targeting 64-bit,
12309     // since we're actually doing arithmetic on them.  Other registers
12310     // can be 32-bit.
12311     bool is64bit = Subtarget.isPPC64();
12312     bool isLittleEndian = Subtarget.isLittleEndian();
12313     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12314 
12315     Register dest = MI.getOperand(0).getReg();
12316     Register ptrA = MI.getOperand(1).getReg();
12317     Register ptrB = MI.getOperand(2).getReg();
12318     Register oldval = MI.getOperand(3).getReg();
12319     Register newval = MI.getOperand(4).getReg();
12320     DebugLoc dl = MI.getDebugLoc();
12321 
12322     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12323     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12324     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12325     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12326     F->insert(It, loop1MBB);
12327     F->insert(It, loop2MBB);
12328     F->insert(It, midMBB);
12329     F->insert(It, exitMBB);
12330     exitMBB->splice(exitMBB->begin(), BB,
12331                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12332     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12333 
12334     MachineRegisterInfo &RegInfo = F->getRegInfo();
12335     const TargetRegisterClass *RC =
12336         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12337     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12338 
12339     Register PtrReg = RegInfo.createVirtualRegister(RC);
12340     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12341     Register ShiftReg =
12342         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12343     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12344     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12345     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12346     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12347     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12348     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12349     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12350     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12351     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12352     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12353     Register Ptr1Reg;
12354     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12355     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12356     //  thisMBB:
12357     //   ...
12358     //   fallthrough --> loopMBB
12359     BB->addSuccessor(loop1MBB);
12360 
12361     // The 4-byte load must be aligned, while a char or short may be
12362     // anywhere in the word.  Hence all this nasty bookkeeping code.
12363     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
12364     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12365     //   xori shift, shift1, 24 [16]
12366     //   rlwinm ptr, ptr1, 0, 0, 29
12367     //   slw newval2, newval, shift
12368     //   slw oldval2, oldval,shift
12369     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12370     //   slw mask, mask2, shift
12371     //   and newval3, newval2, mask
12372     //   and oldval3, oldval2, mask
12373     // loop1MBB:
12374     //   lwarx tmpDest, ptr
12375     //   and tmp, tmpDest, mask
12376     //   cmpw tmp, oldval3
12377     //   bne- midMBB
12378     // loop2MBB:
12379     //   andc tmp2, tmpDest, mask
12380     //   or tmp4, tmp2, newval3
12381     //   stwcx. tmp4, ptr
12382     //   bne- loop1MBB
12383     //   b exitBB
12384     // midMBB:
12385     //   stwcx. tmpDest, ptr
12386     // exitBB:
12387     //   srw dest, tmpDest, shift
12388     if (ptrA != ZeroReg) {
12389       Ptr1Reg = RegInfo.createVirtualRegister(RC);
12390       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12391           .addReg(ptrA)
12392           .addReg(ptrB);
12393     } else {
12394       Ptr1Reg = ptrB;
12395     }
12396 
12397     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12398     // mode.
12399     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12400         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12401         .addImm(3)
12402         .addImm(27)
12403         .addImm(is8bit ? 28 : 27);
12404     if (!isLittleEndian)
12405       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12406           .addReg(Shift1Reg)
12407           .addImm(is8bit ? 24 : 16);
12408     if (is64bit)
12409       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12410           .addReg(Ptr1Reg)
12411           .addImm(0)
12412           .addImm(61);
12413     else
12414       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12415           .addReg(Ptr1Reg)
12416           .addImm(0)
12417           .addImm(0)
12418           .addImm(29);
12419     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12420         .addReg(newval)
12421         .addReg(ShiftReg);
12422     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12423         .addReg(oldval)
12424         .addReg(ShiftReg);
12425     if (is8bit)
12426       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12427     else {
12428       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12429       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12430           .addReg(Mask3Reg)
12431           .addImm(65535);
12432     }
12433     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12434         .addReg(Mask2Reg)
12435         .addReg(ShiftReg);
12436     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12437         .addReg(NewVal2Reg)
12438         .addReg(MaskReg);
12439     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12440         .addReg(OldVal2Reg)
12441         .addReg(MaskReg);
12442 
12443     BB = loop1MBB;
12444     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12445         .addReg(ZeroReg)
12446         .addReg(PtrReg);
12447     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12448         .addReg(TmpDestReg)
12449         .addReg(MaskReg);
12450     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12451         .addReg(TmpReg)
12452         .addReg(OldVal3Reg);
12453     BuildMI(BB, dl, TII->get(PPC::BCC))
12454         .addImm(PPC::PRED_NE)
12455         .addReg(PPC::CR0)
12456         .addMBB(midMBB);
12457     BB->addSuccessor(loop2MBB);
12458     BB->addSuccessor(midMBB);
12459 
12460     BB = loop2MBB;
12461     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12462         .addReg(TmpDestReg)
12463         .addReg(MaskReg);
12464     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12465         .addReg(Tmp2Reg)
12466         .addReg(NewVal3Reg);
12467     BuildMI(BB, dl, TII->get(PPC::STWCX))
12468         .addReg(Tmp4Reg)
12469         .addReg(ZeroReg)
12470         .addReg(PtrReg);
12471     BuildMI(BB, dl, TII->get(PPC::BCC))
12472         .addImm(PPC::PRED_NE)
12473         .addReg(PPC::CR0)
12474         .addMBB(loop1MBB);
12475     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12476     BB->addSuccessor(loop1MBB);
12477     BB->addSuccessor(exitMBB);
12478 
12479     BB = midMBB;
12480     BuildMI(BB, dl, TII->get(PPC::STWCX))
12481         .addReg(TmpDestReg)
12482         .addReg(ZeroReg)
12483         .addReg(PtrReg);
12484     BB->addSuccessor(exitMBB);
12485 
12486     //  exitMBB:
12487     //   ...
12488     BB = exitMBB;
12489     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12490         .addReg(TmpReg)
12491         .addReg(ShiftReg);
12492   } else if (MI.getOpcode() == PPC::FADDrtz) {
12493     // This pseudo performs an FADD with rounding mode temporarily forced
12494     // to round-to-zero.  We emit this via custom inserter since the FPSCR
12495     // is not modeled at the SelectionDAG level.
12496     Register Dest = MI.getOperand(0).getReg();
12497     Register Src1 = MI.getOperand(1).getReg();
12498     Register Src2 = MI.getOperand(2).getReg();
12499     DebugLoc dl = MI.getDebugLoc();
12500 
12501     MachineRegisterInfo &RegInfo = F->getRegInfo();
12502     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12503 
12504     // Save FPSCR value.
12505     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12506 
12507     // Set rounding mode to round-to-zero.
12508     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12509         .addImm(31)
12510         .addReg(PPC::RM, RegState::ImplicitDefine);
12511 
12512     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12513         .addImm(30)
12514         .addReg(PPC::RM, RegState::ImplicitDefine);
12515 
12516     // Perform addition.
12517     auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12518                    .addReg(Src1)
12519                    .addReg(Src2);
12520     if (MI.getFlag(MachineInstr::NoFPExcept))
12521       MIB.setMIFlag(MachineInstr::NoFPExcept);
12522 
12523     // Restore FPSCR value.
12524     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12525   } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12526              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12527              MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12528              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12529     unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12530                        MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12531                           ? PPC::ANDI8_rec
12532                           : PPC::ANDI_rec;
12533     bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12534                  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12535 
12536     MachineRegisterInfo &RegInfo = F->getRegInfo();
12537     Register Dest = RegInfo.createVirtualRegister(
12538         Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12539 
12540     DebugLoc Dl = MI.getDebugLoc();
12541     BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12542         .addReg(MI.getOperand(1).getReg())
12543         .addImm(1);
12544     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12545             MI.getOperand(0).getReg())
12546         .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12547   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12548     DebugLoc Dl = MI.getDebugLoc();
12549     MachineRegisterInfo &RegInfo = F->getRegInfo();
12550     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12551     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12552     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12553             MI.getOperand(0).getReg())
12554         .addReg(CRReg);
12555   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12556     DebugLoc Dl = MI.getDebugLoc();
12557     unsigned Imm = MI.getOperand(1).getImm();
12558     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12559     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12560             MI.getOperand(0).getReg())
12561         .addReg(PPC::CR0EQ);
12562   } else if (MI.getOpcode() == PPC::SETRNDi) {
12563     DebugLoc dl = MI.getDebugLoc();
12564     Register OldFPSCRReg = MI.getOperand(0).getReg();
12565 
12566     // Save FPSCR value.
12567     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12568 
12569     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12570     // the following settings:
12571     //   00 Round to nearest
12572     //   01 Round to 0
12573     //   10 Round to +inf
12574     //   11 Round to -inf
12575 
12576     // When the operand is immediate, using the two least significant bits of
12577     // the immediate to set the bits 62:63 of FPSCR.
12578     unsigned Mode = MI.getOperand(1).getImm();
12579     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12580         .addImm(31)
12581         .addReg(PPC::RM, RegState::ImplicitDefine);
12582 
12583     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12584         .addImm(30)
12585         .addReg(PPC::RM, RegState::ImplicitDefine);
12586   } else if (MI.getOpcode() == PPC::SETRND) {
12587     DebugLoc dl = MI.getDebugLoc();
12588 
12589     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12590     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12591     // If the target doesn't have DirectMove, we should use stack to do the
12592     // conversion, because the target doesn't have the instructions like mtvsrd
12593     // or mfvsrd to do this conversion directly.
12594     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12595       if (Subtarget.hasDirectMove()) {
12596         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12597           .addReg(SrcReg);
12598       } else {
12599         // Use stack to do the register copy.
12600         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12601         MachineRegisterInfo &RegInfo = F->getRegInfo();
12602         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12603         if (RC == &PPC::F8RCRegClass) {
12604           // Copy register from F8RCRegClass to G8RCRegclass.
12605           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12606                  "Unsupported RegClass.");
12607 
12608           StoreOp = PPC::STFD;
12609           LoadOp = PPC::LD;
12610         } else {
12611           // Copy register from G8RCRegClass to F8RCRegclass.
12612           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12613                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12614                  "Unsupported RegClass.");
12615         }
12616 
12617         MachineFrameInfo &MFI = F->getFrameInfo();
12618         int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12619 
12620         MachineMemOperand *MMOStore = F->getMachineMemOperand(
12621             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12622             MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12623             MFI.getObjectAlign(FrameIdx));
12624 
12625         // Store the SrcReg into the stack.
12626         BuildMI(*BB, MI, dl, TII->get(StoreOp))
12627           .addReg(SrcReg)
12628           .addImm(0)
12629           .addFrameIndex(FrameIdx)
12630           .addMemOperand(MMOStore);
12631 
12632         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12633             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12634             MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12635             MFI.getObjectAlign(FrameIdx));
12636 
12637         // Load from the stack where SrcReg is stored, and save to DestReg,
12638         // so we have done the RegClass conversion from RegClass::SrcReg to
12639         // RegClass::DestReg.
12640         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12641           .addImm(0)
12642           .addFrameIndex(FrameIdx)
12643           .addMemOperand(MMOLoad);
12644       }
12645     };
12646 
12647     Register OldFPSCRReg = MI.getOperand(0).getReg();
12648 
12649     // Save FPSCR value.
12650     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12651 
12652     // When the operand is gprc register, use two least significant bits of the
12653     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12654     //
12655     // copy OldFPSCRTmpReg, OldFPSCRReg
12656     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12657     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12658     // copy NewFPSCRReg, NewFPSCRTmpReg
12659     // mtfsf 255, NewFPSCRReg
12660     MachineOperand SrcOp = MI.getOperand(1);
12661     MachineRegisterInfo &RegInfo = F->getRegInfo();
12662     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12663 
12664     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12665 
12666     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12667     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12668 
12669     // The first operand of INSERT_SUBREG should be a register which has
12670     // subregisters, we only care about its RegClass, so we should use an
12671     // IMPLICIT_DEF register.
12672     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12673     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12674       .addReg(ImDefReg)
12675       .add(SrcOp)
12676       .addImm(1);
12677 
12678     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12679     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12680       .addReg(OldFPSCRTmpReg)
12681       .addReg(ExtSrcReg)
12682       .addImm(0)
12683       .addImm(62);
12684 
12685     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12686     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12687 
12688     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12689     // bits of FPSCR.
12690     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12691       .addImm(255)
12692       .addReg(NewFPSCRReg)
12693       .addImm(0)
12694       .addImm(0);
12695   } else if (MI.getOpcode() == PPC::SETFLM) {
12696     DebugLoc Dl = MI.getDebugLoc();
12697 
12698     // Result of setflm is previous FPSCR content, so we need to save it first.
12699     Register OldFPSCRReg = MI.getOperand(0).getReg();
12700     BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12701 
12702     // Put bits in 32:63 to FPSCR.
12703     Register NewFPSCRReg = MI.getOperand(1).getReg();
12704     BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12705         .addImm(255)
12706         .addReg(NewFPSCRReg)
12707         .addImm(0)
12708         .addImm(0);
12709   } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12710              MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12711     return emitProbedAlloca(MI, BB);
12712   } else {
12713     llvm_unreachable("Unexpected instr type to insert");
12714   }
12715 
12716   MI.eraseFromParent(); // The pseudo instruction is gone now.
12717   return BB;
12718 }
12719 
12720 //===----------------------------------------------------------------------===//
12721 // Target Optimization Hooks
12722 //===----------------------------------------------------------------------===//
12723 
12724 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12725   // For the estimates, convergence is quadratic, so we essentially double the
12726   // number of digits correct after every iteration. For both FRE and FRSQRTE,
12727   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12728   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12729   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12730   if (VT.getScalarType() == MVT::f64)
12731     RefinementSteps++;
12732   return RefinementSteps;
12733 }
12734 
12735 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12736                                            int Enabled, int &RefinementSteps,
12737                                            bool &UseOneConstNR,
12738                                            bool Reciprocal) const {
12739   EVT VT = Operand.getValueType();
12740   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12741       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12742       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12743       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12744     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12745       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12746 
12747     // The Newton-Raphson computation with a single constant does not provide
12748     // enough accuracy on some CPUs.
12749     UseOneConstNR = !Subtarget.needsTwoConstNR();
12750     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12751   }
12752   return SDValue();
12753 }
12754 
12755 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12756                                             int Enabled,
12757                                             int &RefinementSteps) const {
12758   EVT VT = Operand.getValueType();
12759   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12760       (VT == MVT::f64 && Subtarget.hasFRE()) ||
12761       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12762       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12763     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12764       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12765     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12766   }
12767   return SDValue();
12768 }
12769 
12770 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12771   // Note: This functionality is used only when unsafe-fp-math is enabled, and
12772   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12773   // enabled for division), this functionality is redundant with the default
12774   // combiner logic (once the division -> reciprocal/multiply transformation
12775   // has taken place). As a result, this matters more for older cores than for
12776   // newer ones.
12777 
12778   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12779   // reciprocal if there are two or more FDIVs (for embedded cores with only
12780   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12781   switch (Subtarget.getCPUDirective()) {
12782   default:
12783     return 3;
12784   case PPC::DIR_440:
12785   case PPC::DIR_A2:
12786   case PPC::DIR_E500:
12787   case PPC::DIR_E500mc:
12788   case PPC::DIR_E5500:
12789     return 2;
12790   }
12791 }
12792 
12793 // isConsecutiveLSLoc needs to work even if all adds have not yet been
12794 // collapsed, and so we need to look through chains of them.
12795 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
12796                                      int64_t& Offset, SelectionDAG &DAG) {
12797   if (DAG.isBaseWithConstantOffset(Loc)) {
12798     Base = Loc.getOperand(0);
12799     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12800 
12801     // The base might itself be a base plus an offset, and if so, accumulate
12802     // that as well.
12803     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12804   }
12805 }
12806 
12807 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12808                             unsigned Bytes, int Dist,
12809                             SelectionDAG &DAG) {
12810   if (VT.getSizeInBits() / 8 != Bytes)
12811     return false;
12812 
12813   SDValue BaseLoc = Base->getBasePtr();
12814   if (Loc.getOpcode() == ISD::FrameIndex) {
12815     if (BaseLoc.getOpcode() != ISD::FrameIndex)
12816       return false;
12817     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12818     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
12819     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12820     int FS  = MFI.getObjectSize(FI);
12821     int BFS = MFI.getObjectSize(BFI);
12822     if (FS != BFS || FS != (int)Bytes) return false;
12823     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12824   }
12825 
12826   SDValue Base1 = Loc, Base2 = BaseLoc;
12827   int64_t Offset1 = 0, Offset2 = 0;
12828   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12829   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12830   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12831     return true;
12832 
12833   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12834   const GlobalValue *GV1 = nullptr;
12835   const GlobalValue *GV2 = nullptr;
12836   Offset1 = 0;
12837   Offset2 = 0;
12838   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12839   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12840   if (isGA1 && isGA2 && GV1 == GV2)
12841     return Offset1 == (Offset2 + Dist*Bytes);
12842   return false;
12843 }
12844 
12845 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12846 // not enforce equality of the chain operands.
12847 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12848                             unsigned Bytes, int Dist,
12849                             SelectionDAG &DAG) {
12850   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12851     EVT VT = LS->getMemoryVT();
12852     SDValue Loc = LS->getBasePtr();
12853     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12854   }
12855 
12856   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12857     EVT VT;
12858     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12859     default: return false;
12860     case Intrinsic::ppc_altivec_lvx:
12861     case Intrinsic::ppc_altivec_lvxl:
12862     case Intrinsic::ppc_vsx_lxvw4x:
12863     case Intrinsic::ppc_vsx_lxvw4x_be:
12864       VT = MVT::v4i32;
12865       break;
12866     case Intrinsic::ppc_vsx_lxvd2x:
12867     case Intrinsic::ppc_vsx_lxvd2x_be:
12868       VT = MVT::v2f64;
12869       break;
12870     case Intrinsic::ppc_altivec_lvebx:
12871       VT = MVT::i8;
12872       break;
12873     case Intrinsic::ppc_altivec_lvehx:
12874       VT = MVT::i16;
12875       break;
12876     case Intrinsic::ppc_altivec_lvewx:
12877       VT = MVT::i32;
12878       break;
12879     }
12880 
12881     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12882   }
12883 
12884   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12885     EVT VT;
12886     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12887     default: return false;
12888     case Intrinsic::ppc_altivec_stvx:
12889     case Intrinsic::ppc_altivec_stvxl:
12890     case Intrinsic::ppc_vsx_stxvw4x:
12891       VT = MVT::v4i32;
12892       break;
12893     case Intrinsic::ppc_vsx_stxvd2x:
12894       VT = MVT::v2f64;
12895       break;
12896     case Intrinsic::ppc_vsx_stxvw4x_be:
12897       VT = MVT::v4i32;
12898       break;
12899     case Intrinsic::ppc_vsx_stxvd2x_be:
12900       VT = MVT::v2f64;
12901       break;
12902     case Intrinsic::ppc_altivec_stvebx:
12903       VT = MVT::i8;
12904       break;
12905     case Intrinsic::ppc_altivec_stvehx:
12906       VT = MVT::i16;
12907       break;
12908     case Intrinsic::ppc_altivec_stvewx:
12909       VT = MVT::i32;
12910       break;
12911     }
12912 
12913     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12914   }
12915 
12916   return false;
12917 }
12918 
12919 // Return true is there is a nearyby consecutive load to the one provided
12920 // (regardless of alignment). We search up and down the chain, looking though
12921 // token factors and other loads (but nothing else). As a result, a true result
12922 // indicates that it is safe to create a new consecutive load adjacent to the
12923 // load provided.
12924 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
12925   SDValue Chain = LD->getChain();
12926   EVT VT = LD->getMemoryVT();
12927 
12928   SmallSet<SDNode *, 16> LoadRoots;
12929   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12930   SmallSet<SDNode *, 16> Visited;
12931 
12932   // First, search up the chain, branching to follow all token-factor operands.
12933   // If we find a consecutive load, then we're done, otherwise, record all
12934   // nodes just above the top-level loads and token factors.
12935   while (!Queue.empty()) {
12936     SDNode *ChainNext = Queue.pop_back_val();
12937     if (!Visited.insert(ChainNext).second)
12938       continue;
12939 
12940     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12941       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12942         return true;
12943 
12944       if (!Visited.count(ChainLD->getChain().getNode()))
12945         Queue.push_back(ChainLD->getChain().getNode());
12946     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12947       for (const SDUse &O : ChainNext->ops())
12948         if (!Visited.count(O.getNode()))
12949           Queue.push_back(O.getNode());
12950     } else
12951       LoadRoots.insert(ChainNext);
12952   }
12953 
12954   // Second, search down the chain, starting from the top-level nodes recorded
12955   // in the first phase. These top-level nodes are the nodes just above all
12956   // loads and token factors. Starting with their uses, recursively look though
12957   // all loads (just the chain uses) and token factors to find a consecutive
12958   // load.
12959   Visited.clear();
12960   Queue.clear();
12961 
12962   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12963        IE = LoadRoots.end(); I != IE; ++I) {
12964     Queue.push_back(*I);
12965 
12966     while (!Queue.empty()) {
12967       SDNode *LoadRoot = Queue.pop_back_val();
12968       if (!Visited.insert(LoadRoot).second)
12969         continue;
12970 
12971       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12972         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12973           return true;
12974 
12975       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12976            UE = LoadRoot->use_end(); UI != UE; ++UI)
12977         if (((isa<MemSDNode>(*UI) &&
12978             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12979             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12980           Queue.push_back(*UI);
12981     }
12982   }
12983 
12984   return false;
12985 }
12986 
12987 /// This function is called when we have proved that a SETCC node can be replaced
12988 /// by subtraction (and other supporting instructions) so that the result of
12989 /// comparison is kept in a GPR instead of CR. This function is purely for
12990 /// codegen purposes and has some flags to guide the codegen process.
12991 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12992                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12993   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12994 
12995   // Zero extend the operands to the largest legal integer. Originally, they
12996   // must be of a strictly smaller size.
12997   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12998                          DAG.getConstant(Size, DL, MVT::i32));
12999   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13000                          DAG.getConstant(Size, DL, MVT::i32));
13001 
13002   // Swap if needed. Depends on the condition code.
13003   if (Swap)
13004     std::swap(Op0, Op1);
13005 
13006   // Subtract extended integers.
13007   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13008 
13009   // Move the sign bit to the least significant position and zero out the rest.
13010   // Now the least significant bit carries the result of original comparison.
13011   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13012                              DAG.getConstant(Size - 1, DL, MVT::i32));
13013   auto Final = Shifted;
13014 
13015   // Complement the result if needed. Based on the condition code.
13016   if (Complement)
13017     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13018                         DAG.getConstant(1, DL, MVT::i64));
13019 
13020   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13021 }
13022 
13023 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13024                                                   DAGCombinerInfo &DCI) const {
13025   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13026 
13027   SelectionDAG &DAG = DCI.DAG;
13028   SDLoc DL(N);
13029 
13030   // Size of integers being compared has a critical role in the following
13031   // analysis, so we prefer to do this when all types are legal.
13032   if (!DCI.isAfterLegalizeDAG())
13033     return SDValue();
13034 
13035   // If all users of SETCC extend its value to a legal integer type
13036   // then we replace SETCC with a subtraction
13037   for (SDNode::use_iterator UI = N->use_begin(),
13038        UE = N->use_end(); UI != UE; ++UI) {
13039     if (UI->getOpcode() != ISD::ZERO_EXTEND)
13040       return SDValue();
13041   }
13042 
13043   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13044   auto OpSize = N->getOperand(0).getValueSizeInBits();
13045 
13046   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
13047 
13048   if (OpSize < Size) {
13049     switch (CC) {
13050     default: break;
13051     case ISD::SETULT:
13052       return generateEquivalentSub(N, Size, false, false, DL, DAG);
13053     case ISD::SETULE:
13054       return generateEquivalentSub(N, Size, true, true, DL, DAG);
13055     case ISD::SETUGT:
13056       return generateEquivalentSub(N, Size, false, true, DL, DAG);
13057     case ISD::SETUGE:
13058       return generateEquivalentSub(N, Size, true, false, DL, DAG);
13059     }
13060   }
13061 
13062   return SDValue();
13063 }
13064 
13065 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13066                                                   DAGCombinerInfo &DCI) const {
13067   SelectionDAG &DAG = DCI.DAG;
13068   SDLoc dl(N);
13069 
13070   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13071   // If we're tracking CR bits, we need to be careful that we don't have:
13072   //   trunc(binary-ops(zext(x), zext(y)))
13073   // or
13074   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13075   // such that we're unnecessarily moving things into GPRs when it would be
13076   // better to keep them in CR bits.
13077 
13078   // Note that trunc here can be an actual i1 trunc, or can be the effective
13079   // truncation that comes from a setcc or select_cc.
13080   if (N->getOpcode() == ISD::TRUNCATE &&
13081       N->getValueType(0) != MVT::i1)
13082     return SDValue();
13083 
13084   if (N->getOperand(0).getValueType() != MVT::i32 &&
13085       N->getOperand(0).getValueType() != MVT::i64)
13086     return SDValue();
13087 
13088   if (N->getOpcode() == ISD::SETCC ||
13089       N->getOpcode() == ISD::SELECT_CC) {
13090     // If we're looking at a comparison, then we need to make sure that the
13091     // high bits (all except for the first) don't matter the result.
13092     ISD::CondCode CC =
13093       cast<CondCodeSDNode>(N->getOperand(
13094         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13095     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13096 
13097     if (ISD::isSignedIntSetCC(CC)) {
13098       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13099           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13100         return SDValue();
13101     } else if (ISD::isUnsignedIntSetCC(CC)) {
13102       if (!DAG.MaskedValueIsZero(N->getOperand(0),
13103                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13104           !DAG.MaskedValueIsZero(N->getOperand(1),
13105                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
13106         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13107                                              : SDValue());
13108     } else {
13109       // This is neither a signed nor an unsigned comparison, just make sure
13110       // that the high bits are equal.
13111       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13112       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13113 
13114       // We don't really care about what is known about the first bit (if
13115       // anything), so clear it in all masks prior to comparing them.
13116       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
13117       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
13118 
13119       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
13120         return SDValue();
13121     }
13122   }
13123 
13124   // We now know that the higher-order bits are irrelevant, we just need to
13125   // make sure that all of the intermediate operations are bit operations, and
13126   // all inputs are extensions.
13127   if (N->getOperand(0).getOpcode() != ISD::AND &&
13128       N->getOperand(0).getOpcode() != ISD::OR  &&
13129       N->getOperand(0).getOpcode() != ISD::XOR &&
13130       N->getOperand(0).getOpcode() != ISD::SELECT &&
13131       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13132       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13133       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13134       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13135       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13136     return SDValue();
13137 
13138   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13139       N->getOperand(1).getOpcode() != ISD::AND &&
13140       N->getOperand(1).getOpcode() != ISD::OR  &&
13141       N->getOperand(1).getOpcode() != ISD::XOR &&
13142       N->getOperand(1).getOpcode() != ISD::SELECT &&
13143       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13144       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13145       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13146       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13147       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13148     return SDValue();
13149 
13150   SmallVector<SDValue, 4> Inputs;
13151   SmallVector<SDValue, 8> BinOps, PromOps;
13152   SmallPtrSet<SDNode *, 16> Visited;
13153 
13154   for (unsigned i = 0; i < 2; ++i) {
13155     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13156           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13157           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13158           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13159         isa<ConstantSDNode>(N->getOperand(i)))
13160       Inputs.push_back(N->getOperand(i));
13161     else
13162       BinOps.push_back(N->getOperand(i));
13163 
13164     if (N->getOpcode() == ISD::TRUNCATE)
13165       break;
13166   }
13167 
13168   // Visit all inputs, collect all binary operations (and, or, xor and
13169   // select) that are all fed by extensions.
13170   while (!BinOps.empty()) {
13171     SDValue BinOp = BinOps.back();
13172     BinOps.pop_back();
13173 
13174     if (!Visited.insert(BinOp.getNode()).second)
13175       continue;
13176 
13177     PromOps.push_back(BinOp);
13178 
13179     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13180       // The condition of the select is not promoted.
13181       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13182         continue;
13183       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13184         continue;
13185 
13186       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13187             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13188             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13189            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13190           isa<ConstantSDNode>(BinOp.getOperand(i))) {
13191         Inputs.push_back(BinOp.getOperand(i));
13192       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13193                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
13194                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13195                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13196                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13197                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13198                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13199                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13200                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13201         BinOps.push_back(BinOp.getOperand(i));
13202       } else {
13203         // We have an input that is not an extension or another binary
13204         // operation; we'll abort this transformation.
13205         return SDValue();
13206       }
13207     }
13208   }
13209 
13210   // Make sure that this is a self-contained cluster of operations (which
13211   // is not quite the same thing as saying that everything has only one
13212   // use).
13213   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13214     if (isa<ConstantSDNode>(Inputs[i]))
13215       continue;
13216 
13217     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13218                               UE = Inputs[i].getNode()->use_end();
13219          UI != UE; ++UI) {
13220       SDNode *User = *UI;
13221       if (User != N && !Visited.count(User))
13222         return SDValue();
13223 
13224       // Make sure that we're not going to promote the non-output-value
13225       // operand(s) or SELECT or SELECT_CC.
13226       // FIXME: Although we could sometimes handle this, and it does occur in
13227       // practice that one of the condition inputs to the select is also one of
13228       // the outputs, we currently can't deal with this.
13229       if (User->getOpcode() == ISD::SELECT) {
13230         if (User->getOperand(0) == Inputs[i])
13231           return SDValue();
13232       } else if (User->getOpcode() == ISD::SELECT_CC) {
13233         if (User->getOperand(0) == Inputs[i] ||
13234             User->getOperand(1) == Inputs[i])
13235           return SDValue();
13236       }
13237     }
13238   }
13239 
13240   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13241     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13242                               UE = PromOps[i].getNode()->use_end();
13243          UI != UE; ++UI) {
13244       SDNode *User = *UI;
13245       if (User != N && !Visited.count(User))
13246         return SDValue();
13247 
13248       // Make sure that we're not going to promote the non-output-value
13249       // operand(s) or SELECT or SELECT_CC.
13250       // FIXME: Although we could sometimes handle this, and it does occur in
13251       // practice that one of the condition inputs to the select is also one of
13252       // the outputs, we currently can't deal with this.
13253       if (User->getOpcode() == ISD::SELECT) {
13254         if (User->getOperand(0) == PromOps[i])
13255           return SDValue();
13256       } else if (User->getOpcode() == ISD::SELECT_CC) {
13257         if (User->getOperand(0) == PromOps[i] ||
13258             User->getOperand(1) == PromOps[i])
13259           return SDValue();
13260       }
13261     }
13262   }
13263 
13264   // Replace all inputs with the extension operand.
13265   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13266     // Constants may have users outside the cluster of to-be-promoted nodes,
13267     // and so we need to replace those as we do the promotions.
13268     if (isa<ConstantSDNode>(Inputs[i]))
13269       continue;
13270     else
13271       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13272   }
13273 
13274   std::list<HandleSDNode> PromOpHandles;
13275   for (auto &PromOp : PromOps)
13276     PromOpHandles.emplace_back(PromOp);
13277 
13278   // Replace all operations (these are all the same, but have a different
13279   // (i1) return type). DAG.getNode will validate that the types of
13280   // a binary operator match, so go through the list in reverse so that
13281   // we've likely promoted both operands first. Any intermediate truncations or
13282   // extensions disappear.
13283   while (!PromOpHandles.empty()) {
13284     SDValue PromOp = PromOpHandles.back().getValue();
13285     PromOpHandles.pop_back();
13286 
13287     if (PromOp.getOpcode() == ISD::TRUNCATE ||
13288         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13289         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13290         PromOp.getOpcode() == ISD::ANY_EXTEND) {
13291       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13292           PromOp.getOperand(0).getValueType() != MVT::i1) {
13293         // The operand is not yet ready (see comment below).
13294         PromOpHandles.emplace_front(PromOp);
13295         continue;
13296       }
13297 
13298       SDValue RepValue = PromOp.getOperand(0);
13299       if (isa<ConstantSDNode>(RepValue))
13300         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13301 
13302       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13303       continue;
13304     }
13305 
13306     unsigned C;
13307     switch (PromOp.getOpcode()) {
13308     default:             C = 0; break;
13309     case ISD::SELECT:    C = 1; break;
13310     case ISD::SELECT_CC: C = 2; break;
13311     }
13312 
13313     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13314          PromOp.getOperand(C).getValueType() != MVT::i1) ||
13315         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13316          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13317       // The to-be-promoted operands of this node have not yet been
13318       // promoted (this should be rare because we're going through the
13319       // list backward, but if one of the operands has several users in
13320       // this cluster of to-be-promoted nodes, it is possible).
13321       PromOpHandles.emplace_front(PromOp);
13322       continue;
13323     }
13324 
13325     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13326                                 PromOp.getNode()->op_end());
13327 
13328     // If there are any constant inputs, make sure they're replaced now.
13329     for (unsigned i = 0; i < 2; ++i)
13330       if (isa<ConstantSDNode>(Ops[C+i]))
13331         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13332 
13333     DAG.ReplaceAllUsesOfValueWith(PromOp,
13334       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13335   }
13336 
13337   // Now we're left with the initial truncation itself.
13338   if (N->getOpcode() == ISD::TRUNCATE)
13339     return N->getOperand(0);
13340 
13341   // Otherwise, this is a comparison. The operands to be compared have just
13342   // changed type (to i1), but everything else is the same.
13343   return SDValue(N, 0);
13344 }
13345 
13346 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13347                                                   DAGCombinerInfo &DCI) const {
13348   SelectionDAG &DAG = DCI.DAG;
13349   SDLoc dl(N);
13350 
13351   // If we're tracking CR bits, we need to be careful that we don't have:
13352   //   zext(binary-ops(trunc(x), trunc(y)))
13353   // or
13354   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13355   // such that we're unnecessarily moving things into CR bits that can more
13356   // efficiently stay in GPRs. Note that if we're not certain that the high
13357   // bits are set as required by the final extension, we still may need to do
13358   // some masking to get the proper behavior.
13359 
13360   // This same functionality is important on PPC64 when dealing with
13361   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13362   // the return values of functions. Because it is so similar, it is handled
13363   // here as well.
13364 
13365   if (N->getValueType(0) != MVT::i32 &&
13366       N->getValueType(0) != MVT::i64)
13367     return SDValue();
13368 
13369   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13370         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13371     return SDValue();
13372 
13373   if (N->getOperand(0).getOpcode() != ISD::AND &&
13374       N->getOperand(0).getOpcode() != ISD::OR  &&
13375       N->getOperand(0).getOpcode() != ISD::XOR &&
13376       N->getOperand(0).getOpcode() != ISD::SELECT &&
13377       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13378     return SDValue();
13379 
13380   SmallVector<SDValue, 4> Inputs;
13381   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13382   SmallPtrSet<SDNode *, 16> Visited;
13383 
13384   // Visit all inputs, collect all binary operations (and, or, xor and
13385   // select) that are all fed by truncations.
13386   while (!BinOps.empty()) {
13387     SDValue BinOp = BinOps.back();
13388     BinOps.pop_back();
13389 
13390     if (!Visited.insert(BinOp.getNode()).second)
13391       continue;
13392 
13393     PromOps.push_back(BinOp);
13394 
13395     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13396       // The condition of the select is not promoted.
13397       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13398         continue;
13399       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13400         continue;
13401 
13402       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13403           isa<ConstantSDNode>(BinOp.getOperand(i))) {
13404         Inputs.push_back(BinOp.getOperand(i));
13405       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13406                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
13407                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13408                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13409                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13410         BinOps.push_back(BinOp.getOperand(i));
13411       } else {
13412         // We have an input that is not a truncation or another binary
13413         // operation; we'll abort this transformation.
13414         return SDValue();
13415       }
13416     }
13417   }
13418 
13419   // The operands of a select that must be truncated when the select is
13420   // promoted because the operand is actually part of the to-be-promoted set.
13421   DenseMap<SDNode *, EVT> SelectTruncOp[2];
13422 
13423   // Make sure that this is a self-contained cluster of operations (which
13424   // is not quite the same thing as saying that everything has only one
13425   // use).
13426   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13427     if (isa<ConstantSDNode>(Inputs[i]))
13428       continue;
13429 
13430     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13431                               UE = Inputs[i].getNode()->use_end();
13432          UI != UE; ++UI) {
13433       SDNode *User = *UI;
13434       if (User != N && !Visited.count(User))
13435         return SDValue();
13436 
13437       // If we're going to promote the non-output-value operand(s) or SELECT or
13438       // SELECT_CC, record them for truncation.
13439       if (User->getOpcode() == ISD::SELECT) {
13440         if (User->getOperand(0) == Inputs[i])
13441           SelectTruncOp[0].insert(std::make_pair(User,
13442                                     User->getOperand(0).getValueType()));
13443       } else if (User->getOpcode() == ISD::SELECT_CC) {
13444         if (User->getOperand(0) == Inputs[i])
13445           SelectTruncOp[0].insert(std::make_pair(User,
13446                                     User->getOperand(0).getValueType()));
13447         if (User->getOperand(1) == Inputs[i])
13448           SelectTruncOp[1].insert(std::make_pair(User,
13449                                     User->getOperand(1).getValueType()));
13450       }
13451     }
13452   }
13453 
13454   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13455     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13456                               UE = PromOps[i].getNode()->use_end();
13457          UI != UE; ++UI) {
13458       SDNode *User = *UI;
13459       if (User != N && !Visited.count(User))
13460         return SDValue();
13461 
13462       // If we're going to promote the non-output-value operand(s) or SELECT or
13463       // SELECT_CC, record them for truncation.
13464       if (User->getOpcode() == ISD::SELECT) {
13465         if (User->getOperand(0) == PromOps[i])
13466           SelectTruncOp[0].insert(std::make_pair(User,
13467                                     User->getOperand(0).getValueType()));
13468       } else if (User->getOpcode() == ISD::SELECT_CC) {
13469         if (User->getOperand(0) == PromOps[i])
13470           SelectTruncOp[0].insert(std::make_pair(User,
13471                                     User->getOperand(0).getValueType()));
13472         if (User->getOperand(1) == PromOps[i])
13473           SelectTruncOp[1].insert(std::make_pair(User,
13474                                     User->getOperand(1).getValueType()));
13475       }
13476     }
13477   }
13478 
13479   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13480   bool ReallyNeedsExt = false;
13481   if (N->getOpcode() != ISD::ANY_EXTEND) {
13482     // If all of the inputs are not already sign/zero extended, then
13483     // we'll still need to do that at the end.
13484     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13485       if (isa<ConstantSDNode>(Inputs[i]))
13486         continue;
13487 
13488       unsigned OpBits =
13489         Inputs[i].getOperand(0).getValueSizeInBits();
13490       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13491 
13492       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13493            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13494                                   APInt::getHighBitsSet(OpBits,
13495                                                         OpBits-PromBits))) ||
13496           (N->getOpcode() == ISD::SIGN_EXTEND &&
13497            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13498              (OpBits-(PromBits-1)))) {
13499         ReallyNeedsExt = true;
13500         break;
13501       }
13502     }
13503   }
13504 
13505   // Replace all inputs, either with the truncation operand, or a
13506   // truncation or extension to the final output type.
13507   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13508     // Constant inputs need to be replaced with the to-be-promoted nodes that
13509     // use them because they might have users outside of the cluster of
13510     // promoted nodes.
13511     if (isa<ConstantSDNode>(Inputs[i]))
13512       continue;
13513 
13514     SDValue InSrc = Inputs[i].getOperand(0);
13515     if (Inputs[i].getValueType() == N->getValueType(0))
13516       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13517     else if (N->getOpcode() == ISD::SIGN_EXTEND)
13518       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13519         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13520     else if (N->getOpcode() == ISD::ZERO_EXTEND)
13521       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13522         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13523     else
13524       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13525         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13526   }
13527 
13528   std::list<HandleSDNode> PromOpHandles;
13529   for (auto &PromOp : PromOps)
13530     PromOpHandles.emplace_back(PromOp);
13531 
13532   // Replace all operations (these are all the same, but have a different
13533   // (promoted) return type). DAG.getNode will validate that the types of
13534   // a binary operator match, so go through the list in reverse so that
13535   // we've likely promoted both operands first.
13536   while (!PromOpHandles.empty()) {
13537     SDValue PromOp = PromOpHandles.back().getValue();
13538     PromOpHandles.pop_back();
13539 
13540     unsigned C;
13541     switch (PromOp.getOpcode()) {
13542     default:             C = 0; break;
13543     case ISD::SELECT:    C = 1; break;
13544     case ISD::SELECT_CC: C = 2; break;
13545     }
13546 
13547     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13548          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13549         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13550          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13551       // The to-be-promoted operands of this node have not yet been
13552       // promoted (this should be rare because we're going through the
13553       // list backward, but if one of the operands has several users in
13554       // this cluster of to-be-promoted nodes, it is possible).
13555       PromOpHandles.emplace_front(PromOp);
13556       continue;
13557     }
13558 
13559     // For SELECT and SELECT_CC nodes, we do a similar check for any
13560     // to-be-promoted comparison inputs.
13561     if (PromOp.getOpcode() == ISD::SELECT ||
13562         PromOp.getOpcode() == ISD::SELECT_CC) {
13563       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13564            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13565           (SelectTruncOp[1].count(PromOp.getNode()) &&
13566            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13567         PromOpHandles.emplace_front(PromOp);
13568         continue;
13569       }
13570     }
13571 
13572     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13573                                 PromOp.getNode()->op_end());
13574 
13575     // If this node has constant inputs, then they'll need to be promoted here.
13576     for (unsigned i = 0; i < 2; ++i) {
13577       if (!isa<ConstantSDNode>(Ops[C+i]))
13578         continue;
13579       if (Ops[C+i].getValueType() == N->getValueType(0))
13580         continue;
13581 
13582       if (N->getOpcode() == ISD::SIGN_EXTEND)
13583         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13584       else if (N->getOpcode() == ISD::ZERO_EXTEND)
13585         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13586       else
13587         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13588     }
13589 
13590     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13591     // truncate them again to the original value type.
13592     if (PromOp.getOpcode() == ISD::SELECT ||
13593         PromOp.getOpcode() == ISD::SELECT_CC) {
13594       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13595       if (SI0 != SelectTruncOp[0].end())
13596         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13597       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13598       if (SI1 != SelectTruncOp[1].end())
13599         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13600     }
13601 
13602     DAG.ReplaceAllUsesOfValueWith(PromOp,
13603       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13604   }
13605 
13606   // Now we're left with the initial extension itself.
13607   if (!ReallyNeedsExt)
13608     return N->getOperand(0);
13609 
13610   // To zero extend, just mask off everything except for the first bit (in the
13611   // i1 case).
13612   if (N->getOpcode() == ISD::ZERO_EXTEND)
13613     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13614                        DAG.getConstant(APInt::getLowBitsSet(
13615                                          N->getValueSizeInBits(0), PromBits),
13616                                        dl, N->getValueType(0)));
13617 
13618   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13619          "Invalid extension type");
13620   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13621   SDValue ShiftCst =
13622       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13623   return DAG.getNode(
13624       ISD::SRA, dl, N->getValueType(0),
13625       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13626       ShiftCst);
13627 }
13628 
13629 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13630                                         DAGCombinerInfo &DCI) const {
13631   assert(N->getOpcode() == ISD::SETCC &&
13632          "Should be called with a SETCC node");
13633 
13634   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13635   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13636     SDValue LHS = N->getOperand(0);
13637     SDValue RHS = N->getOperand(1);
13638 
13639     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13640     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13641         LHS.hasOneUse())
13642       std::swap(LHS, RHS);
13643 
13644     // x == 0-y --> x+y == 0
13645     // x != 0-y --> x+y != 0
13646     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13647         RHS.hasOneUse()) {
13648       SDLoc DL(N);
13649       SelectionDAG &DAG = DCI.DAG;
13650       EVT VT = N->getValueType(0);
13651       EVT OpVT = LHS.getValueType();
13652       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13653       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13654     }
13655   }
13656 
13657   return DAGCombineTruncBoolExt(N, DCI);
13658 }
13659 
13660 // Is this an extending load from an f32 to an f64?
13661 static bool isFPExtLoad(SDValue Op) {
13662   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13663     return LD->getExtensionType() == ISD::EXTLOAD &&
13664       Op.getValueType() == MVT::f64;
13665   return false;
13666 }
13667 
13668 /// Reduces the number of fp-to-int conversion when building a vector.
13669 ///
13670 /// If this vector is built out of floating to integer conversions,
13671 /// transform it to a vector built out of floating point values followed by a
13672 /// single floating to integer conversion of the vector.
13673 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
13674 /// becomes (fptosi (build_vector ($A, $B, ...)))
13675 SDValue PPCTargetLowering::
13676 combineElementTruncationToVectorTruncation(SDNode *N,
13677                                            DAGCombinerInfo &DCI) const {
13678   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13679          "Should be called with a BUILD_VECTOR node");
13680 
13681   SelectionDAG &DAG = DCI.DAG;
13682   SDLoc dl(N);
13683 
13684   SDValue FirstInput = N->getOperand(0);
13685   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13686          "The input operand must be an fp-to-int conversion.");
13687 
13688   // This combine happens after legalization so the fp_to_[su]i nodes are
13689   // already converted to PPCSISD nodes.
13690   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13691   if (FirstConversion == PPCISD::FCTIDZ ||
13692       FirstConversion == PPCISD::FCTIDUZ ||
13693       FirstConversion == PPCISD::FCTIWZ ||
13694       FirstConversion == PPCISD::FCTIWUZ) {
13695     bool IsSplat = true;
13696     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13697       FirstConversion == PPCISD::FCTIWUZ;
13698     EVT SrcVT = FirstInput.getOperand(0).getValueType();
13699     SmallVector<SDValue, 4> Ops;
13700     EVT TargetVT = N->getValueType(0);
13701     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13702       SDValue NextOp = N->getOperand(i);
13703       if (NextOp.getOpcode() != PPCISD::MFVSR)
13704         return SDValue();
13705       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13706       if (NextConversion != FirstConversion)
13707         return SDValue();
13708       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13709       // This is not valid if the input was originally double precision. It is
13710       // also not profitable to do unless this is an extending load in which
13711       // case doing this combine will allow us to combine consecutive loads.
13712       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13713         return SDValue();
13714       if (N->getOperand(i) != FirstInput)
13715         IsSplat = false;
13716     }
13717 
13718     // If this is a splat, we leave it as-is since there will be only a single
13719     // fp-to-int conversion followed by a splat of the integer. This is better
13720     // for 32-bit and smaller ints and neutral for 64-bit ints.
13721     if (IsSplat)
13722       return SDValue();
13723 
13724     // Now that we know we have the right type of node, get its operands
13725     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13726       SDValue In = N->getOperand(i).getOperand(0);
13727       if (Is32Bit) {
13728         // For 32-bit values, we need to add an FP_ROUND node (if we made it
13729         // here, we know that all inputs are extending loads so this is safe).
13730         if (In.isUndef())
13731           Ops.push_back(DAG.getUNDEF(SrcVT));
13732         else {
13733           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13734                                       MVT::f32, In.getOperand(0),
13735                                       DAG.getIntPtrConstant(1, dl));
13736           Ops.push_back(Trunc);
13737         }
13738       } else
13739         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13740     }
13741 
13742     unsigned Opcode;
13743     if (FirstConversion == PPCISD::FCTIDZ ||
13744         FirstConversion == PPCISD::FCTIWZ)
13745       Opcode = ISD::FP_TO_SINT;
13746     else
13747       Opcode = ISD::FP_TO_UINT;
13748 
13749     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13750     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13751     return DAG.getNode(Opcode, dl, TargetVT, BV);
13752   }
13753   return SDValue();
13754 }
13755 
13756 /// Reduce the number of loads when building a vector.
13757 ///
13758 /// Building a vector out of multiple loads can be converted to a load
13759 /// of the vector type if the loads are consecutive. If the loads are
13760 /// consecutive but in descending order, a shuffle is added at the end
13761 /// to reorder the vector.
13762 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
13763   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13764          "Should be called with a BUILD_VECTOR node");
13765 
13766   SDLoc dl(N);
13767 
13768   // Return early for non byte-sized type, as they can't be consecutive.
13769   if (!N->getValueType(0).getVectorElementType().isByteSized())
13770     return SDValue();
13771 
13772   bool InputsAreConsecutiveLoads = true;
13773   bool InputsAreReverseConsecutive = true;
13774   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13775   SDValue FirstInput = N->getOperand(0);
13776   bool IsRoundOfExtLoad = false;
13777 
13778   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13779       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13780     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13781     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13782   }
13783   // Not a build vector of (possibly fp_rounded) loads.
13784   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13785       N->getNumOperands() == 1)
13786     return SDValue();
13787 
13788   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13789     // If any inputs are fp_round(extload), they all must be.
13790     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13791       return SDValue();
13792 
13793     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13794       N->getOperand(i);
13795     if (NextInput.getOpcode() != ISD::LOAD)
13796       return SDValue();
13797 
13798     SDValue PreviousInput =
13799       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13800     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13801     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13802 
13803     // If any inputs are fp_round(extload), they all must be.
13804     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13805       return SDValue();
13806 
13807     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13808       InputsAreConsecutiveLoads = false;
13809     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13810       InputsAreReverseConsecutive = false;
13811 
13812     // Exit early if the loads are neither consecutive nor reverse consecutive.
13813     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13814       return SDValue();
13815   }
13816 
13817   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13818          "The loads cannot be both consecutive and reverse consecutive.");
13819 
13820   SDValue FirstLoadOp =
13821     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13822   SDValue LastLoadOp =
13823     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13824                        N->getOperand(N->getNumOperands()-1);
13825 
13826   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13827   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13828   if (InputsAreConsecutiveLoads) {
13829     assert(LD1 && "Input needs to be a LoadSDNode.");
13830     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13831                        LD1->getBasePtr(), LD1->getPointerInfo(),
13832                        LD1->getAlignment());
13833   }
13834   if (InputsAreReverseConsecutive) {
13835     assert(LDL && "Input needs to be a LoadSDNode.");
13836     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13837                                LDL->getBasePtr(), LDL->getPointerInfo(),
13838                                LDL->getAlignment());
13839     SmallVector<int, 16> Ops;
13840     for (int i = N->getNumOperands() - 1; i >= 0; i--)
13841       Ops.push_back(i);
13842 
13843     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13844                                 DAG.getUNDEF(N->getValueType(0)), Ops);
13845   }
13846   return SDValue();
13847 }
13848 
13849 // This function adds the required vector_shuffle needed to get
13850 // the elements of the vector extract in the correct position
13851 // as specified by the CorrectElems encoding.
13852 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
13853                                       SDValue Input, uint64_t Elems,
13854                                       uint64_t CorrectElems) {
13855   SDLoc dl(N);
13856 
13857   unsigned NumElems = Input.getValueType().getVectorNumElements();
13858   SmallVector<int, 16> ShuffleMask(NumElems, -1);
13859 
13860   // Knowing the element indices being extracted from the original
13861   // vector and the order in which they're being inserted, just put
13862   // them at element indices required for the instruction.
13863   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13864     if (DAG.getDataLayout().isLittleEndian())
13865       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13866     else
13867       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13868     CorrectElems = CorrectElems >> 8;
13869     Elems = Elems >> 8;
13870   }
13871 
13872   SDValue Shuffle =
13873       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13874                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13875 
13876   EVT VT = N->getValueType(0);
13877   SDValue Conv = DAG.getBitcast(VT, Shuffle);
13878 
13879   EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13880                                Input.getValueType().getVectorElementType(),
13881                                VT.getVectorNumElements());
13882   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13883                      DAG.getValueType(ExtVT));
13884 }
13885 
13886 // Look for build vector patterns where input operands come from sign
13887 // extended vector_extract elements of specific indices. If the correct indices
13888 // aren't used, add a vector shuffle to fix up the indices and create
13889 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13890 // during instruction selection.
13891 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
13892   // This array encodes the indices that the vector sign extend instructions
13893   // extract from when extending from one type to another for both BE and LE.
13894   // The right nibble of each byte corresponds to the LE incides.
13895   // and the left nibble of each byte corresponds to the BE incides.
13896   // For example: 0x3074B8FC  byte->word
13897   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13898   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13899   // For example: 0x000070F8  byte->double word
13900   // For LE: the allowed indices are: 0x0,0x8
13901   // For BE: the allowed indices are: 0x7,0xF
13902   uint64_t TargetElems[] = {
13903       0x3074B8FC, // b->w
13904       0x000070F8, // b->d
13905       0x10325476, // h->w
13906       0x00003074, // h->d
13907       0x00001032, // w->d
13908   };
13909 
13910   uint64_t Elems = 0;
13911   int Index;
13912   SDValue Input;
13913 
13914   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13915     if (!Op)
13916       return false;
13917     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13918         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13919       return false;
13920 
13921     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13922     // of the right width.
13923     SDValue Extract = Op.getOperand(0);
13924     if (Extract.getOpcode() == ISD::ANY_EXTEND)
13925       Extract = Extract.getOperand(0);
13926     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13927       return false;
13928 
13929     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13930     if (!ExtOp)
13931       return false;
13932 
13933     Index = ExtOp->getZExtValue();
13934     if (Input && Input != Extract.getOperand(0))
13935       return false;
13936 
13937     if (!Input)
13938       Input = Extract.getOperand(0);
13939 
13940     Elems = Elems << 8;
13941     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13942     Elems |= Index;
13943 
13944     return true;
13945   };
13946 
13947   // If the build vector operands aren't sign extended vector extracts,
13948   // of the same input vector, then return.
13949   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13950     if (!isSExtOfVecExtract(N->getOperand(i))) {
13951       return SDValue();
13952     }
13953   }
13954 
13955   // If the vector extract indicies are not correct, add the appropriate
13956   // vector_shuffle.
13957   int TgtElemArrayIdx;
13958   int InputSize = Input.getValueType().getScalarSizeInBits();
13959   int OutputSize = N->getValueType(0).getScalarSizeInBits();
13960   if (InputSize + OutputSize == 40)
13961     TgtElemArrayIdx = 0;
13962   else if (InputSize + OutputSize == 72)
13963     TgtElemArrayIdx = 1;
13964   else if (InputSize + OutputSize == 48)
13965     TgtElemArrayIdx = 2;
13966   else if (InputSize + OutputSize == 80)
13967     TgtElemArrayIdx = 3;
13968   else if (InputSize + OutputSize == 96)
13969     TgtElemArrayIdx = 4;
13970   else
13971     return SDValue();
13972 
13973   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13974   CorrectElems = DAG.getDataLayout().isLittleEndian()
13975                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13976                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13977   if (Elems != CorrectElems) {
13978     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13979   }
13980 
13981   // Regular lowering will catch cases where a shuffle is not needed.
13982   return SDValue();
13983 }
13984 
13985 // Look for the pattern of a load from a narrow width to i128, feeding
13986 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
13987 // (LXVRZX). This node represents a zero extending load that will be matched
13988 // to the Load VSX Vector Rightmost instructions.
13989 static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
13990   SDLoc DL(N);
13991 
13992   // This combine is only eligible for a BUILD_VECTOR of v1i128.
13993   if (N->getValueType(0) != MVT::v1i128)
13994     return SDValue();
13995 
13996   SDValue Operand = N->getOperand(0);
13997   // Proceed with the transformation if the operand to the BUILD_VECTOR
13998   // is a load instruction.
13999   if (Operand.getOpcode() != ISD::LOAD)
14000     return SDValue();
14001 
14002   LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
14003   EVT MemoryType = LD->getMemoryVT();
14004 
14005   // This transformation is only valid if the we are loading either a byte,
14006   // halfword, word, or doubleword.
14007   bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14008                      MemoryType == MVT::i32 || MemoryType == MVT::i64;
14009 
14010   // Ensure that the load from the narrow width is being zero extended to i128.
14011   if (!ValidLDType ||
14012       (LD->getExtensionType() != ISD::ZEXTLOAD &&
14013        LD->getExtensionType() != ISD::EXTLOAD))
14014     return SDValue();
14015 
14016   SDValue LoadOps[] = {
14017       LD->getChain(), LD->getBasePtr(),
14018       DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14019 
14020   return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
14021                                  DAG.getVTList(MVT::v1i128, MVT::Other),
14022                                  LoadOps, MemoryType, LD->getMemOperand());
14023 }
14024 
14025 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14026                                                  DAGCombinerInfo &DCI) const {
14027   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14028          "Should be called with a BUILD_VECTOR node");
14029 
14030   SelectionDAG &DAG = DCI.DAG;
14031   SDLoc dl(N);
14032 
14033   if (!Subtarget.hasVSX())
14034     return SDValue();
14035 
14036   // The target independent DAG combiner will leave a build_vector of
14037   // float-to-int conversions intact. We can generate MUCH better code for
14038   // a float-to-int conversion of a vector of floats.
14039   SDValue FirstInput = N->getOperand(0);
14040   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14041     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14042     if (Reduced)
14043       return Reduced;
14044   }
14045 
14046   // If we're building a vector out of consecutive loads, just load that
14047   // vector type.
14048   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14049   if (Reduced)
14050     return Reduced;
14051 
14052   // If we're building a vector out of extended elements from another vector
14053   // we have P9 vector integer extend instructions. The code assumes legal
14054   // input types (i.e. it can't handle things like v4i16) so do not run before
14055   // legalization.
14056   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14057     Reduced = combineBVOfVecSExt(N, DAG);
14058     if (Reduced)
14059       return Reduced;
14060   }
14061 
14062   // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14063   // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14064   // is a load from <valid narrow width> to i128.
14065   if (Subtarget.isISA3_1()) {
14066     SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14067     if (BVOfZLoad)
14068       return BVOfZLoad;
14069   }
14070 
14071   if (N->getValueType(0) != MVT::v2f64)
14072     return SDValue();
14073 
14074   // Looking for:
14075   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14076   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14077       FirstInput.getOpcode() != ISD::UINT_TO_FP)
14078     return SDValue();
14079   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14080       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14081     return SDValue();
14082   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14083     return SDValue();
14084 
14085   SDValue Ext1 = FirstInput.getOperand(0);
14086   SDValue Ext2 = N->getOperand(1).getOperand(0);
14087   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14088      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14089     return SDValue();
14090 
14091   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14092   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14093   if (!Ext1Op || !Ext2Op)
14094     return SDValue();
14095   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14096       Ext1.getOperand(0) != Ext2.getOperand(0))
14097     return SDValue();
14098 
14099   int FirstElem = Ext1Op->getZExtValue();
14100   int SecondElem = Ext2Op->getZExtValue();
14101   int SubvecIdx;
14102   if (FirstElem == 0 && SecondElem == 1)
14103     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14104   else if (FirstElem == 2 && SecondElem == 3)
14105     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14106   else
14107     return SDValue();
14108 
14109   SDValue SrcVec = Ext1.getOperand(0);
14110   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14111     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
14112   return DAG.getNode(NodeType, dl, MVT::v2f64,
14113                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14114 }
14115 
14116 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14117                                               DAGCombinerInfo &DCI) const {
14118   assert((N->getOpcode() == ISD::SINT_TO_FP ||
14119           N->getOpcode() == ISD::UINT_TO_FP) &&
14120          "Need an int -> FP conversion node here");
14121 
14122   if (useSoftFloat() || !Subtarget.has64BitSupport())
14123     return SDValue();
14124 
14125   SelectionDAG &DAG = DCI.DAG;
14126   SDLoc dl(N);
14127   SDValue Op(N, 0);
14128 
14129   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14130   // from the hardware.
14131   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14132     return SDValue();
14133   if (!Op.getOperand(0).getValueType().isSimple())
14134     return SDValue();
14135   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14136       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14137     return SDValue();
14138 
14139   SDValue FirstOperand(Op.getOperand(0));
14140   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14141     (FirstOperand.getValueType() == MVT::i8 ||
14142      FirstOperand.getValueType() == MVT::i16);
14143   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14144     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14145     bool DstDouble = Op.getValueType() == MVT::f64;
14146     unsigned ConvOp = Signed ?
14147       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
14148       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14149     SDValue WidthConst =
14150       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14151                             dl, false);
14152     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14153     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14154     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
14155                                          DAG.getVTList(MVT::f64, MVT::Other),
14156                                          Ops, MVT::i8, LDN->getMemOperand());
14157 
14158     // For signed conversion, we need to sign-extend the value in the VSR
14159     if (Signed) {
14160       SDValue ExtOps[] = { Ld, WidthConst };
14161       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14162       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14163     } else
14164       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14165   }
14166 
14167 
14168   // For i32 intermediate values, unfortunately, the conversion functions
14169   // leave the upper 32 bits of the value are undefined. Within the set of
14170   // scalar instructions, we have no method for zero- or sign-extending the
14171   // value. Thus, we cannot handle i32 intermediate values here.
14172   if (Op.getOperand(0).getValueType() == MVT::i32)
14173     return SDValue();
14174 
14175   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14176          "UINT_TO_FP is supported only with FPCVT");
14177 
14178   // If we have FCFIDS, then use it when converting to single-precision.
14179   // Otherwise, convert to double-precision and then round.
14180   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14181                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14182                                                             : PPCISD::FCFIDS)
14183                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14184                                                             : PPCISD::FCFID);
14185   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14186                   ? MVT::f32
14187                   : MVT::f64;
14188 
14189   // If we're converting from a float, to an int, and back to a float again,
14190   // then we don't need the store/load pair at all.
14191   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14192        Subtarget.hasFPCVT()) ||
14193       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14194     SDValue Src = Op.getOperand(0).getOperand(0);
14195     if (Src.getValueType() == MVT::f32) {
14196       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14197       DCI.AddToWorklist(Src.getNode());
14198     } else if (Src.getValueType() != MVT::f64) {
14199       // Make sure that we don't pick up a ppc_fp128 source value.
14200       return SDValue();
14201     }
14202 
14203     unsigned FCTOp =
14204       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14205                                                         PPCISD::FCTIDUZ;
14206 
14207     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14208     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14209 
14210     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14211       FP = DAG.getNode(ISD::FP_ROUND, dl,
14212                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14213       DCI.AddToWorklist(FP.getNode());
14214     }
14215 
14216     return FP;
14217   }
14218 
14219   return SDValue();
14220 }
14221 
14222 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14223 // builtins) into loads with swaps.
14224 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
14225                                               DAGCombinerInfo &DCI) const {
14226   SelectionDAG &DAG = DCI.DAG;
14227   SDLoc dl(N);
14228   SDValue Chain;
14229   SDValue Base;
14230   MachineMemOperand *MMO;
14231 
14232   switch (N->getOpcode()) {
14233   default:
14234     llvm_unreachable("Unexpected opcode for little endian VSX load");
14235   case ISD::LOAD: {
14236     LoadSDNode *LD = cast<LoadSDNode>(N);
14237     Chain = LD->getChain();
14238     Base = LD->getBasePtr();
14239     MMO = LD->getMemOperand();
14240     // If the MMO suggests this isn't a load of a full vector, leave
14241     // things alone.  For a built-in, we have to make the change for
14242     // correctness, so if there is a size problem that will be a bug.
14243     if (MMO->getSize() < 16)
14244       return SDValue();
14245     break;
14246   }
14247   case ISD::INTRINSIC_W_CHAIN: {
14248     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14249     Chain = Intrin->getChain();
14250     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14251     // us what we want. Get operand 2 instead.
14252     Base = Intrin->getOperand(2);
14253     MMO = Intrin->getMemOperand();
14254     break;
14255   }
14256   }
14257 
14258   MVT VecTy = N->getValueType(0).getSimpleVT();
14259 
14260   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
14261   // aligned and the type is a vector with elements up to 4 bytes
14262   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14263       VecTy.getScalarSizeInBits() <= 32) {
14264     return SDValue();
14265   }
14266 
14267   SDValue LoadOps[] = { Chain, Base };
14268   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
14269                                          DAG.getVTList(MVT::v2f64, MVT::Other),
14270                                          LoadOps, MVT::v2f64, MMO);
14271 
14272   DCI.AddToWorklist(Load.getNode());
14273   Chain = Load.getValue(1);
14274   SDValue Swap = DAG.getNode(
14275       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14276   DCI.AddToWorklist(Swap.getNode());
14277 
14278   // Add a bitcast if the resulting load type doesn't match v2f64.
14279   if (VecTy != MVT::v2f64) {
14280     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14281     DCI.AddToWorklist(N.getNode());
14282     // Package {bitcast value, swap's chain} to match Load's shape.
14283     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14284                        N, Swap.getValue(1));
14285   }
14286 
14287   return Swap;
14288 }
14289 
14290 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14291 // builtins) into stores with swaps.
14292 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
14293                                                DAGCombinerInfo &DCI) const {
14294   SelectionDAG &DAG = DCI.DAG;
14295   SDLoc dl(N);
14296   SDValue Chain;
14297   SDValue Base;
14298   unsigned SrcOpnd;
14299   MachineMemOperand *MMO;
14300 
14301   switch (N->getOpcode()) {
14302   default:
14303     llvm_unreachable("Unexpected opcode for little endian VSX store");
14304   case ISD::STORE: {
14305     StoreSDNode *ST = cast<StoreSDNode>(N);
14306     Chain = ST->getChain();
14307     Base = ST->getBasePtr();
14308     MMO = ST->getMemOperand();
14309     SrcOpnd = 1;
14310     // If the MMO suggests this isn't a store of a full vector, leave
14311     // things alone.  For a built-in, we have to make the change for
14312     // correctness, so if there is a size problem that will be a bug.
14313     if (MMO->getSize() < 16)
14314       return SDValue();
14315     break;
14316   }
14317   case ISD::INTRINSIC_VOID: {
14318     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14319     Chain = Intrin->getChain();
14320     // Intrin->getBasePtr() oddly does not get what we want.
14321     Base = Intrin->getOperand(3);
14322     MMO = Intrin->getMemOperand();
14323     SrcOpnd = 2;
14324     break;
14325   }
14326   }
14327 
14328   SDValue Src = N->getOperand(SrcOpnd);
14329   MVT VecTy = Src.getValueType().getSimpleVT();
14330 
14331   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
14332   // aligned and the type is a vector with elements up to 4 bytes
14333   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14334       VecTy.getScalarSizeInBits() <= 32) {
14335     return SDValue();
14336   }
14337 
14338   // All stores are done as v2f64 and possible bit cast.
14339   if (VecTy != MVT::v2f64) {
14340     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14341     DCI.AddToWorklist(Src.getNode());
14342   }
14343 
14344   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14345                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14346   DCI.AddToWorklist(Swap.getNode());
14347   Chain = Swap.getValue(1);
14348   SDValue StoreOps[] = { Chain, Swap, Base };
14349   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
14350                                           DAG.getVTList(MVT::Other),
14351                                           StoreOps, VecTy, MMO);
14352   DCI.AddToWorklist(Store.getNode());
14353   return Store;
14354 }
14355 
14356 // Handle DAG combine for STORE (FP_TO_INT F).
14357 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14358                                                DAGCombinerInfo &DCI) const {
14359 
14360   SelectionDAG &DAG = DCI.DAG;
14361   SDLoc dl(N);
14362   unsigned Opcode = N->getOperand(1).getOpcode();
14363 
14364   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
14365          && "Not a FP_TO_INT Instruction!");
14366 
14367   SDValue Val = N->getOperand(1).getOperand(0);
14368   EVT Op1VT = N->getOperand(1).getValueType();
14369   EVT ResVT = Val.getValueType();
14370 
14371   if (!isTypeLegal(ResVT))
14372     return SDValue();
14373 
14374   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14375   bool ValidTypeForStoreFltAsInt =
14376         (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14377          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14378 
14379   if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14380       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14381     return SDValue();
14382 
14383   // Extend f32 values to f64
14384   if (ResVT.getScalarSizeInBits() == 32) {
14385     Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14386     DCI.AddToWorklist(Val.getNode());
14387   }
14388 
14389   // Set signed or unsigned conversion opcode.
14390   unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14391                           PPCISD::FP_TO_SINT_IN_VSR :
14392                           PPCISD::FP_TO_UINT_IN_VSR;
14393 
14394   Val = DAG.getNode(ConvOpcode,
14395                     dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14396   DCI.AddToWorklist(Val.getNode());
14397 
14398   // Set number of bytes being converted.
14399   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14400   SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14401                     DAG.getIntPtrConstant(ByteSize, dl, false),
14402                     DAG.getValueType(Op1VT) };
14403 
14404   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
14405           DAG.getVTList(MVT::Other), Ops,
14406           cast<StoreSDNode>(N)->getMemoryVT(),
14407           cast<StoreSDNode>(N)->getMemOperand());
14408 
14409   DCI.AddToWorklist(Val.getNode());
14410   return Val;
14411 }
14412 
14413 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14414   // Check that the source of the element keeps flipping
14415   // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14416   bool PrevElemFromFirstVec = Mask[0] < NumElts;
14417   for (int i = 1, e = Mask.size(); i < e; i++) {
14418     if (PrevElemFromFirstVec && Mask[i] < NumElts)
14419       return false;
14420     if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14421       return false;
14422     PrevElemFromFirstVec = !PrevElemFromFirstVec;
14423   }
14424   return true;
14425 }
14426 
14427 static bool isSplatBV(SDValue Op) {
14428   if (Op.getOpcode() != ISD::BUILD_VECTOR)
14429     return false;
14430   SDValue FirstOp;
14431 
14432   // Find first non-undef input.
14433   for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14434     FirstOp = Op.getOperand(i);
14435     if (!FirstOp.isUndef())
14436       break;
14437   }
14438 
14439   // All inputs are undef or the same as the first non-undef input.
14440   for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14441     if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14442       return false;
14443   return true;
14444 }
14445 
14446 static SDValue isScalarToVec(SDValue Op) {
14447   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14448     return Op;
14449   if (Op.getOpcode() != ISD::BITCAST)
14450     return SDValue();
14451   Op = Op.getOperand(0);
14452   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14453     return Op;
14454   return SDValue();
14455 }
14456 
14457 static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
14458                                             int LHSMaxIdx, int RHSMinIdx,
14459                                             int RHSMaxIdx, int HalfVec) {
14460   for (int i = 0, e = ShuffV.size(); i < e; i++) {
14461     int Idx = ShuffV[i];
14462     if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14463       ShuffV[i] += HalfVec;
14464   }
14465   return;
14466 }
14467 
14468 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14469 // the original is:
14470 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14471 // In such a case, just change the shuffle mask to extract the element
14472 // from the permuted index.
14473 static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
14474   SDLoc dl(OrigSToV);
14475   EVT VT = OrigSToV.getValueType();
14476   assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14477          "Expecting a SCALAR_TO_VECTOR here");
14478   SDValue Input = OrigSToV.getOperand(0);
14479 
14480   if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14481     ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14482     SDValue OrigVector = Input.getOperand(0);
14483 
14484     // Can't handle non-const element indices or different vector types
14485     // for the input to the extract and the output of the scalar_to_vector.
14486     if (Idx && VT == OrigVector.getValueType()) {
14487       SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
14488       NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
14489       return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14490     }
14491   }
14492   return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14493                      OrigSToV.getOperand(0));
14494 }
14495 
14496 // On little endian subtargets, combine shuffles such as:
14497 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14498 // into:
14499 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14500 // because the latter can be matched to a single instruction merge.
14501 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14502 // to put the value into element zero. Adjust the shuffle mask so that the
14503 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
14504 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14505                                                 SelectionDAG &DAG) const {
14506   SDValue LHS = SVN->getOperand(0);
14507   SDValue RHS = SVN->getOperand(1);
14508   auto Mask = SVN->getMask();
14509   int NumElts = LHS.getValueType().getVectorNumElements();
14510   SDValue Res(SVN, 0);
14511   SDLoc dl(SVN);
14512 
14513   // None of these combines are useful on big endian systems since the ISA
14514   // already has a big endian bias.
14515   if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14516     return Res;
14517 
14518   // If this is not a shuffle of a shuffle and the first element comes from
14519   // the second vector, canonicalize to the commuted form. This will make it
14520   // more likely to match one of the single instruction patterns.
14521   if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14522       RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14523     std::swap(LHS, RHS);
14524     Res = DAG.getCommutedVectorShuffle(*SVN);
14525     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14526   }
14527 
14528   // Adjust the shuffle mask if either input vector comes from a
14529   // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14530   // form (to prevent the need for a swap).
14531   SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14532   SDValue SToVLHS = isScalarToVec(LHS);
14533   SDValue SToVRHS = isScalarToVec(RHS);
14534   if (SToVLHS || SToVRHS) {
14535     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14536                             : SToVRHS.getValueType().getVectorNumElements();
14537     int NumEltsOut = ShuffV.size();
14538 
14539     // Initially assume that neither input is permuted. These will be adjusted
14540     // accordingly if either input is.
14541     int LHSMaxIdx = -1;
14542     int RHSMinIdx = -1;
14543     int RHSMaxIdx = -1;
14544     int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14545 
14546     // Get the permuted scalar to vector nodes for the source(s) that come from
14547     // ISD::SCALAR_TO_VECTOR.
14548     if (SToVLHS) {
14549       // Set up the values for the shuffle vector fixup.
14550       LHSMaxIdx = NumEltsOut / NumEltsIn;
14551       SToVLHS = getSToVPermuted(SToVLHS, DAG);
14552       if (SToVLHS.getValueType() != LHS.getValueType())
14553         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14554       LHS = SToVLHS;
14555     }
14556     if (SToVRHS) {
14557       RHSMinIdx = NumEltsOut;
14558       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14559       SToVRHS = getSToVPermuted(SToVRHS, DAG);
14560       if (SToVRHS.getValueType() != RHS.getValueType())
14561         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14562       RHS = SToVRHS;
14563     }
14564 
14565     // Fix up the shuffle mask to reflect where the desired element actually is.
14566     // The minimum and maximum indices that correspond to element zero for both
14567     // the LHS and RHS are computed and will control which shuffle mask entries
14568     // are to be changed. For example, if the RHS is permuted, any shuffle mask
14569     // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14570     // HalfVec to refer to the corresponding element in the permuted vector.
14571     fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14572                                     HalfVec);
14573     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14574 
14575     // We may have simplified away the shuffle. We won't be able to do anything
14576     // further with it here.
14577     if (!isa<ShuffleVectorSDNode>(Res))
14578       return Res;
14579     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14580   }
14581 
14582   // The common case after we commuted the shuffle is that the RHS is a splat
14583   // and we have elements coming in from the splat at indices that are not
14584   // conducive to using a merge.
14585   // Example:
14586   // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14587   if (!isSplatBV(RHS))
14588     return Res;
14589 
14590   // We are looking for a mask such that all even elements are from
14591   // one vector and all odd elements from the other.
14592   if (!isAlternatingShuffMask(Mask, NumElts))
14593     return Res;
14594 
14595   // Adjust the mask so we are pulling in the same index from the splat
14596   // as the index from the interesting vector in consecutive elements.
14597   // Example (even elements from first vector):
14598   // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14599   if (Mask[0] < NumElts)
14600     for (int i = 1, e = Mask.size(); i < e; i += 2)
14601       ShuffV[i] = (ShuffV[i - 1] + NumElts);
14602   // Example (odd elements from first vector):
14603   // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14604   else
14605     for (int i = 0, e = Mask.size(); i < e; i += 2)
14606       ShuffV[i] = (ShuffV[i + 1] + NumElts);
14607 
14608   // If the RHS has undefs, we need to remove them since we may have created
14609   // a shuffle that adds those instead of the splat value.
14610   SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14611   RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14612 
14613   Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14614   return Res;
14615 }
14616 
14617 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14618                                                 LSBaseSDNode *LSBase,
14619                                                 DAGCombinerInfo &DCI) const {
14620   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14621         "Not a reverse memop pattern!");
14622 
14623   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14624     auto Mask = SVN->getMask();
14625     int i = 0;
14626     auto I = Mask.rbegin();
14627     auto E = Mask.rend();
14628 
14629     for (; I != E; ++I) {
14630       if (*I != i)
14631         return false;
14632       i++;
14633     }
14634     return true;
14635   };
14636 
14637   SelectionDAG &DAG = DCI.DAG;
14638   EVT VT = SVN->getValueType(0);
14639 
14640   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14641     return SDValue();
14642 
14643   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14644   // See comment in PPCVSXSwapRemoval.cpp.
14645   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14646   if (!Subtarget.hasP9Vector())
14647     return SDValue();
14648 
14649   if(!IsElementReverse(SVN))
14650     return SDValue();
14651 
14652   if (LSBase->getOpcode() == ISD::LOAD) {
14653     SDLoc dl(SVN);
14654     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14655     return DAG.getMemIntrinsicNode(
14656         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14657         LSBase->getMemoryVT(), LSBase->getMemOperand());
14658   }
14659 
14660   if (LSBase->getOpcode() == ISD::STORE) {
14661     SDLoc dl(LSBase);
14662     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14663                           LSBase->getBasePtr()};
14664     return DAG.getMemIntrinsicNode(
14665         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14666         LSBase->getMemoryVT(), LSBase->getMemOperand());
14667   }
14668 
14669   llvm_unreachable("Expected a load or store node here");
14670 }
14671 
14672 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
14673                                              DAGCombinerInfo &DCI) const {
14674   SelectionDAG &DAG = DCI.DAG;
14675   SDLoc dl(N);
14676   switch (N->getOpcode()) {
14677   default: break;
14678   case ISD::ADD:
14679     return combineADD(N, DCI);
14680   case ISD::SHL:
14681     return combineSHL(N, DCI);
14682   case ISD::SRA:
14683     return combineSRA(N, DCI);
14684   case ISD::SRL:
14685     return combineSRL(N, DCI);
14686   case ISD::MUL:
14687     return combineMUL(N, DCI);
14688   case ISD::FMA:
14689   case PPCISD::FNMSUB:
14690     return combineFMALike(N, DCI);
14691   case PPCISD::SHL:
14692     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14693         return N->getOperand(0);
14694     break;
14695   case PPCISD::SRL:
14696     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14697         return N->getOperand(0);
14698     break;
14699   case PPCISD::SRA:
14700     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14701       if (C->isNullValue() ||   //  0 >>s V -> 0.
14702           C->isAllOnesValue())    // -1 >>s V -> -1.
14703         return N->getOperand(0);
14704     }
14705     break;
14706   case ISD::SIGN_EXTEND:
14707   case ISD::ZERO_EXTEND:
14708   case ISD::ANY_EXTEND:
14709     return DAGCombineExtBoolTrunc(N, DCI);
14710   case ISD::TRUNCATE:
14711     return combineTRUNCATE(N, DCI);
14712   case ISD::SETCC:
14713     if (SDValue CSCC = combineSetCC(N, DCI))
14714       return CSCC;
14715     LLVM_FALLTHROUGH;
14716   case ISD::SELECT_CC:
14717     return DAGCombineTruncBoolExt(N, DCI);
14718   case ISD::SINT_TO_FP:
14719   case ISD::UINT_TO_FP:
14720     return combineFPToIntToFP(N, DCI);
14721   case ISD::VECTOR_SHUFFLE:
14722     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14723       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14724       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14725     }
14726     return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14727   case ISD::STORE: {
14728 
14729     EVT Op1VT = N->getOperand(1).getValueType();
14730     unsigned Opcode = N->getOperand(1).getOpcode();
14731 
14732     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14733       SDValue Val= combineStoreFPToInt(N, DCI);
14734       if (Val)
14735         return Val;
14736     }
14737 
14738     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14739       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14740       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14741       if (Val)
14742         return Val;
14743     }
14744 
14745     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14746     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14747         N->getOperand(1).getNode()->hasOneUse() &&
14748         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14749          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14750 
14751       // STBRX can only handle simple types and it makes no sense to store less
14752       // two bytes in byte-reversed order.
14753       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14754       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14755         break;
14756 
14757       SDValue BSwapOp = N->getOperand(1).getOperand(0);
14758       // Do an any-extend to 32-bits if this is a half-word input.
14759       if (BSwapOp.getValueType() == MVT::i16)
14760         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14761 
14762       // If the type of BSWAP operand is wider than stored memory width
14763       // it need to be shifted to the right side before STBRX.
14764       if (Op1VT.bitsGT(mVT)) {
14765         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14766         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14767                               DAG.getConstant(Shift, dl, MVT::i32));
14768         // Need to truncate if this is a bswap of i64 stored as i32/i16.
14769         if (Op1VT == MVT::i64)
14770           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14771       }
14772 
14773       SDValue Ops[] = {
14774         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14775       };
14776       return
14777         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
14778                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14779                                 cast<StoreSDNode>(N)->getMemOperand());
14780     }
14781 
14782     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
14783     // So it can increase the chance of CSE constant construction.
14784     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14785         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14786       // Need to sign-extended to 64-bits to handle negative values.
14787       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14788       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14789                                     MemVT.getSizeInBits());
14790       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14791 
14792       // DAG.getTruncStore() can't be used here because it doesn't accept
14793       // the general (base + offset) addressing mode.
14794       // So we use UpdateNodeOperands and setTruncatingStore instead.
14795       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14796                              N->getOperand(3));
14797       cast<StoreSDNode>(N)->setTruncatingStore(true);
14798       return SDValue(N, 0);
14799     }
14800 
14801     // For little endian, VSX stores require generating xxswapd/lxvd2x.
14802     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14803     if (Op1VT.isSimple()) {
14804       MVT StoreVT = Op1VT.getSimpleVT();
14805       if (Subtarget.needsSwapsForVSXMemOps() &&
14806           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14807            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14808         return expandVSXStoreForLE(N, DCI);
14809     }
14810     break;
14811   }
14812   case ISD::LOAD: {
14813     LoadSDNode *LD = cast<LoadSDNode>(N);
14814     EVT VT = LD->getValueType(0);
14815 
14816     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14817     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14818     if (VT.isSimple()) {
14819       MVT LoadVT = VT.getSimpleVT();
14820       if (Subtarget.needsSwapsForVSXMemOps() &&
14821           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14822            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14823         return expandVSXLoadForLE(N, DCI);
14824     }
14825 
14826     // We sometimes end up with a 64-bit integer load, from which we extract
14827     // two single-precision floating-point numbers. This happens with
14828     // std::complex<float>, and other similar structures, because of the way we
14829     // canonicalize structure copies. However, if we lack direct moves,
14830     // then the final bitcasts from the extracted integer values to the
14831     // floating-point numbers turn into store/load pairs. Even with direct moves,
14832     // just loading the two floating-point numbers is likely better.
14833     auto ReplaceTwoFloatLoad = [&]() {
14834       if (VT != MVT::i64)
14835         return false;
14836 
14837       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14838           LD->isVolatile())
14839         return false;
14840 
14841       //  We're looking for a sequence like this:
14842       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14843       //      t16: i64 = srl t13, Constant:i32<32>
14844       //    t17: i32 = truncate t16
14845       //  t18: f32 = bitcast t17
14846       //    t19: i32 = truncate t13
14847       //  t20: f32 = bitcast t19
14848 
14849       if (!LD->hasNUsesOfValue(2, 0))
14850         return false;
14851 
14852       auto UI = LD->use_begin();
14853       while (UI.getUse().getResNo() != 0) ++UI;
14854       SDNode *Trunc = *UI++;
14855       while (UI.getUse().getResNo() != 0) ++UI;
14856       SDNode *RightShift = *UI;
14857       if (Trunc->getOpcode() != ISD::TRUNCATE)
14858         std::swap(Trunc, RightShift);
14859 
14860       if (Trunc->getOpcode() != ISD::TRUNCATE ||
14861           Trunc->getValueType(0) != MVT::i32 ||
14862           !Trunc->hasOneUse())
14863         return false;
14864       if (RightShift->getOpcode() != ISD::SRL ||
14865           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14866           RightShift->getConstantOperandVal(1) != 32 ||
14867           !RightShift->hasOneUse())
14868         return false;
14869 
14870       SDNode *Trunc2 = *RightShift->use_begin();
14871       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14872           Trunc2->getValueType(0) != MVT::i32 ||
14873           !Trunc2->hasOneUse())
14874         return false;
14875 
14876       SDNode *Bitcast = *Trunc->use_begin();
14877       SDNode *Bitcast2 = *Trunc2->use_begin();
14878 
14879       if (Bitcast->getOpcode() != ISD::BITCAST ||
14880           Bitcast->getValueType(0) != MVT::f32)
14881         return false;
14882       if (Bitcast2->getOpcode() != ISD::BITCAST ||
14883           Bitcast2->getValueType(0) != MVT::f32)
14884         return false;
14885 
14886       if (Subtarget.isLittleEndian())
14887         std::swap(Bitcast, Bitcast2);
14888 
14889       // Bitcast has the second float (in memory-layout order) and Bitcast2
14890       // has the first one.
14891 
14892       SDValue BasePtr = LD->getBasePtr();
14893       if (LD->isIndexed()) {
14894         assert(LD->getAddressingMode() == ISD::PRE_INC &&
14895                "Non-pre-inc AM on PPC?");
14896         BasePtr =
14897           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14898                       LD->getOffset());
14899       }
14900 
14901       auto MMOFlags =
14902           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14903       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14904                                       LD->getPointerInfo(), LD->getAlignment(),
14905                                       MMOFlags, LD->getAAInfo());
14906       SDValue AddPtr =
14907         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14908                     BasePtr, DAG.getIntPtrConstant(4, dl));
14909       SDValue FloatLoad2 = DAG.getLoad(
14910           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14911           LD->getPointerInfo().getWithOffset(4),
14912           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14913 
14914       if (LD->isIndexed()) {
14915         // Note that DAGCombine should re-form any pre-increment load(s) from
14916         // what is produced here if that makes sense.
14917         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14918       }
14919 
14920       DCI.CombineTo(Bitcast2, FloatLoad);
14921       DCI.CombineTo(Bitcast, FloatLoad2);
14922 
14923       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14924                                     SDValue(FloatLoad2.getNode(), 1));
14925       return true;
14926     };
14927 
14928     if (ReplaceTwoFloatLoad())
14929       return SDValue(N, 0);
14930 
14931     EVT MemVT = LD->getMemoryVT();
14932     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14933     Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14934     if (LD->isUnindexed() && VT.isVector() &&
14935         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14936           // P8 and later hardware should just use LOAD.
14937           !Subtarget.hasP8Vector() &&
14938           (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14939            VT == MVT::v4f32))) &&
14940         LD->getAlign() < ABIAlignment) {
14941       // This is a type-legal unaligned Altivec load.
14942       SDValue Chain = LD->getChain();
14943       SDValue Ptr = LD->getBasePtr();
14944       bool isLittleEndian = Subtarget.isLittleEndian();
14945 
14946       // This implements the loading of unaligned vectors as described in
14947       // the venerable Apple Velocity Engine overview. Specifically:
14948       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14949       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14950       //
14951       // The general idea is to expand a sequence of one or more unaligned
14952       // loads into an alignment-based permutation-control instruction (lvsl
14953       // or lvsr), a series of regular vector loads (which always truncate
14954       // their input address to an aligned address), and a series of
14955       // permutations.  The results of these permutations are the requested
14956       // loaded values.  The trick is that the last "extra" load is not taken
14957       // from the address you might suspect (sizeof(vector) bytes after the
14958       // last requested load), but rather sizeof(vector) - 1 bytes after the
14959       // last requested vector. The point of this is to avoid a page fault if
14960       // the base address happened to be aligned. This works because if the
14961       // base address is aligned, then adding less than a full vector length
14962       // will cause the last vector in the sequence to be (re)loaded.
14963       // Otherwise, the next vector will be fetched as you might suspect was
14964       // necessary.
14965 
14966       // We might be able to reuse the permutation generation from
14967       // a different base address offset from this one by an aligned amount.
14968       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14969       // optimization later.
14970       Intrinsic::ID Intr, IntrLD, IntrPerm;
14971       MVT PermCntlTy, PermTy, LDTy;
14972       Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14973                             : Intrinsic::ppc_altivec_lvsl;
14974       IntrLD = Intrinsic::ppc_altivec_lvx;
14975       IntrPerm = Intrinsic::ppc_altivec_vperm;
14976       PermCntlTy = MVT::v16i8;
14977       PermTy = MVT::v4i32;
14978       LDTy = MVT::v4i32;
14979 
14980       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
14981 
14982       // Create the new MMO for the new base load. It is like the original MMO,
14983       // but represents an area in memory almost twice the vector size centered
14984       // on the original address. If the address is unaligned, we might start
14985       // reading up to (sizeof(vector)-1) bytes below the address of the
14986       // original unaligned load.
14987       MachineFunction &MF = DAG.getMachineFunction();
14988       MachineMemOperand *BaseMMO =
14989         MF.getMachineMemOperand(LD->getMemOperand(),
14990                                 -(long)MemVT.getStoreSize()+1,
14991                                 2*MemVT.getStoreSize()-1);
14992 
14993       // Create the new base load.
14994       SDValue LDXIntID =
14995           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
14996       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14997       SDValue BaseLoad =
14998         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14999                                 DAG.getVTList(PermTy, MVT::Other),
15000                                 BaseLoadOps, LDTy, BaseMMO);
15001 
15002       // Note that the value of IncOffset (which is provided to the next
15003       // load's pointer info offset value, and thus used to calculate the
15004       // alignment), and the value of IncValue (which is actually used to
15005       // increment the pointer value) are different! This is because we
15006       // require the next load to appear to be aligned, even though it
15007       // is actually offset from the base pointer by a lesser amount.
15008       int IncOffset = VT.getSizeInBits() / 8;
15009       int IncValue = IncOffset;
15010 
15011       // Walk (both up and down) the chain looking for another load at the real
15012       // (aligned) offset (the alignment of the other load does not matter in
15013       // this case). If found, then do not use the offset reduction trick, as
15014       // that will prevent the loads from being later combined (as they would
15015       // otherwise be duplicates).
15016       if (!findConsecutiveLoad(LD, DAG))
15017         --IncValue;
15018 
15019       SDValue Increment =
15020           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
15021       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15022 
15023       MachineMemOperand *ExtraMMO =
15024         MF.getMachineMemOperand(LD->getMemOperand(),
15025                                 1, 2*MemVT.getStoreSize()-1);
15026       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15027       SDValue ExtraLoad =
15028         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
15029                                 DAG.getVTList(PermTy, MVT::Other),
15030                                 ExtraLoadOps, LDTy, ExtraMMO);
15031 
15032       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
15033         BaseLoad.getValue(1), ExtraLoad.getValue(1));
15034 
15035       // Because vperm has a big-endian bias, we must reverse the order
15036       // of the input vectors and complement the permute control vector
15037       // when generating little endian code.  We have already handled the
15038       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15039       // and ExtraLoad here.
15040       SDValue Perm;
15041       if (isLittleEndian)
15042         Perm = BuildIntrinsicOp(IntrPerm,
15043                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15044       else
15045         Perm = BuildIntrinsicOp(IntrPerm,
15046                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15047 
15048       if (VT != PermTy)
15049         Perm = Subtarget.hasAltivec()
15050                    ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15051                    : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15052                                  DAG.getTargetConstant(1, dl, MVT::i64));
15053                                // second argument is 1 because this rounding
15054                                // is always exact.
15055 
15056       // The output of the permutation is our loaded result, the TokenFactor is
15057       // our new chain.
15058       DCI.CombineTo(N, Perm, TF);
15059       return SDValue(N, 0);
15060     }
15061     }
15062     break;
15063     case ISD::INTRINSIC_WO_CHAIN: {
15064       bool isLittleEndian = Subtarget.isLittleEndian();
15065       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15066       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15067                                            : Intrinsic::ppc_altivec_lvsl);
15068       if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15069         SDValue Add = N->getOperand(1);
15070 
15071         int Bits = 4 /* 16 byte alignment */;
15072 
15073         if (DAG.MaskedValueIsZero(Add->getOperand(1),
15074                                   APInt::getAllOnesValue(Bits /* alignment */)
15075                                       .zext(Add.getScalarValueSizeInBits()))) {
15076           SDNode *BasePtr = Add->getOperand(0).getNode();
15077           for (SDNode::use_iterator UI = BasePtr->use_begin(),
15078                                     UE = BasePtr->use_end();
15079                UI != UE; ++UI) {
15080             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15081                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
15082                     IID) {
15083               // We've found another LVSL/LVSR, and this address is an aligned
15084               // multiple of that one. The results will be the same, so use the
15085               // one we've just found instead.
15086 
15087               return SDValue(*UI, 0);
15088             }
15089           }
15090         }
15091 
15092         if (isa<ConstantSDNode>(Add->getOperand(1))) {
15093           SDNode *BasePtr = Add->getOperand(0).getNode();
15094           for (SDNode::use_iterator UI = BasePtr->use_begin(),
15095                UE = BasePtr->use_end(); UI != UE; ++UI) {
15096             if (UI->getOpcode() == ISD::ADD &&
15097                 isa<ConstantSDNode>(UI->getOperand(1)) &&
15098                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15099                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
15100                 (1ULL << Bits) == 0) {
15101               SDNode *OtherAdd = *UI;
15102               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
15103                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
15104                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15105                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
15106                   return SDValue(*VI, 0);
15107                 }
15108               }
15109             }
15110           }
15111         }
15112       }
15113 
15114       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15115       // Expose the vabsduw/h/b opportunity for down stream
15116       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15117           (IID == Intrinsic::ppc_altivec_vmaxsw ||
15118            IID == Intrinsic::ppc_altivec_vmaxsh ||
15119            IID == Intrinsic::ppc_altivec_vmaxsb)) {
15120         SDValue V1 = N->getOperand(1);
15121         SDValue V2 = N->getOperand(2);
15122         if ((V1.getSimpleValueType() == MVT::v4i32 ||
15123              V1.getSimpleValueType() == MVT::v8i16 ||
15124              V1.getSimpleValueType() == MVT::v16i8) &&
15125             V1.getSimpleValueType() == V2.getSimpleValueType()) {
15126           // (0-a, a)
15127           if (V1.getOpcode() == ISD::SUB &&
15128               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
15129               V1.getOperand(1) == V2) {
15130             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15131           }
15132           // (a, 0-a)
15133           if (V2.getOpcode() == ISD::SUB &&
15134               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15135               V2.getOperand(1) == V1) {
15136             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15137           }
15138           // (x-y, y-x)
15139           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15140               V1.getOperand(0) == V2.getOperand(1) &&
15141               V1.getOperand(1) == V2.getOperand(0)) {
15142             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15143           }
15144         }
15145       }
15146     }
15147 
15148     break;
15149   case ISD::INTRINSIC_W_CHAIN:
15150     // For little endian, VSX loads require generating lxvd2x/xxswapd.
15151     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15152     if (Subtarget.needsSwapsForVSXMemOps()) {
15153       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15154       default:
15155         break;
15156       case Intrinsic::ppc_vsx_lxvw4x:
15157       case Intrinsic::ppc_vsx_lxvd2x:
15158         return expandVSXLoadForLE(N, DCI);
15159       }
15160     }
15161     break;
15162   case ISD::INTRINSIC_VOID:
15163     // For little endian, VSX stores require generating xxswapd/stxvd2x.
15164     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15165     if (Subtarget.needsSwapsForVSXMemOps()) {
15166       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15167       default:
15168         break;
15169       case Intrinsic::ppc_vsx_stxvw4x:
15170       case Intrinsic::ppc_vsx_stxvd2x:
15171         return expandVSXStoreForLE(N, DCI);
15172       }
15173     }
15174     break;
15175   case ISD::BSWAP:
15176     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15177     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
15178         N->getOperand(0).hasOneUse() &&
15179         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15180          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
15181           N->getValueType(0) == MVT::i64))) {
15182       SDValue Load = N->getOperand(0);
15183       LoadSDNode *LD = cast<LoadSDNode>(Load);
15184       // Create the byte-swapping load.
15185       SDValue Ops[] = {
15186         LD->getChain(),    // Chain
15187         LD->getBasePtr(),  // Ptr
15188         DAG.getValueType(N->getValueType(0)) // VT
15189       };
15190       SDValue BSLoad =
15191         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
15192                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15193                                               MVT::i64 : MVT::i32, MVT::Other),
15194                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
15195 
15196       // If this is an i16 load, insert the truncate.
15197       SDValue ResVal = BSLoad;
15198       if (N->getValueType(0) == MVT::i16)
15199         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15200 
15201       // First, combine the bswap away.  This makes the value produced by the
15202       // load dead.
15203       DCI.CombineTo(N, ResVal);
15204 
15205       // Next, combine the load away, we give it a bogus result value but a real
15206       // chain result.  The result value is dead because the bswap is dead.
15207       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15208 
15209       // Return N so it doesn't get rechecked!
15210       return SDValue(N, 0);
15211     }
15212     break;
15213   case PPCISD::VCMP:
15214     // If a VCMP_rec node already exists with exactly the same operands as this
15215     // node, use its result instead of this node (VCMP_rec computes both a CR6
15216     // and a normal output).
15217     //
15218     if (!N->getOperand(0).hasOneUse() &&
15219         !N->getOperand(1).hasOneUse() &&
15220         !N->getOperand(2).hasOneUse()) {
15221 
15222       // Scan all of the users of the LHS, looking for VCMP_rec's that match.
15223       SDNode *VCMPrecNode = nullptr;
15224 
15225       SDNode *LHSN = N->getOperand(0).getNode();
15226       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15227            UI != E; ++UI)
15228         if (UI->getOpcode() == PPCISD::VCMP_rec &&
15229             UI->getOperand(1) == N->getOperand(1) &&
15230             UI->getOperand(2) == N->getOperand(2) &&
15231             UI->getOperand(0) == N->getOperand(0)) {
15232           VCMPrecNode = *UI;
15233           break;
15234         }
15235 
15236       // If there is no VCMP_rec node, or if the flag value has a single use,
15237       // don't transform this.
15238       if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
15239         break;
15240 
15241       // Look at the (necessarily single) use of the flag value.  If it has a
15242       // chain, this transformation is more complex.  Note that multiple things
15243       // could use the value result, which we should ignore.
15244       SDNode *FlagUser = nullptr;
15245       for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
15246            FlagUser == nullptr; ++UI) {
15247         assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
15248         SDNode *User = *UI;
15249         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15250           if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
15251             FlagUser = User;
15252             break;
15253           }
15254         }
15255       }
15256 
15257       // If the user is a MFOCRF instruction, we know this is safe.
15258       // Otherwise we give up for right now.
15259       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15260         return SDValue(VCMPrecNode, 0);
15261     }
15262     break;
15263   case ISD::BRCOND: {
15264     SDValue Cond = N->getOperand(1);
15265     SDValue Target = N->getOperand(2);
15266 
15267     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15268         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15269           Intrinsic::loop_decrement) {
15270 
15271       // We now need to make the intrinsic dead (it cannot be instruction
15272       // selected).
15273       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15274       assert(Cond.getNode()->hasOneUse() &&
15275              "Counter decrement has more than one use");
15276 
15277       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15278                          N->getOperand(0), Target);
15279     }
15280   }
15281   break;
15282   case ISD::BR_CC: {
15283     // If this is a branch on an altivec predicate comparison, lower this so
15284     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
15285     // lowering is done pre-legalize, because the legalizer lowers the predicate
15286     // compare down to code that is difficult to reassemble.
15287     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15288     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15289 
15290     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15291     // value. If so, pass-through the AND to get to the intrinsic.
15292     if (LHS.getOpcode() == ISD::AND &&
15293         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15294         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15295           Intrinsic::loop_decrement &&
15296         isa<ConstantSDNode>(LHS.getOperand(1)) &&
15297         !isNullConstant(LHS.getOperand(1)))
15298       LHS = LHS.getOperand(0);
15299 
15300     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15301         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15302           Intrinsic::loop_decrement &&
15303         isa<ConstantSDNode>(RHS)) {
15304       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
15305              "Counter decrement comparison is not EQ or NE");
15306 
15307       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15308       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15309                     (CC == ISD::SETNE && !Val);
15310 
15311       // We now need to make the intrinsic dead (it cannot be instruction
15312       // selected).
15313       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15314       assert(LHS.getNode()->hasOneUse() &&
15315              "Counter decrement has more than one use");
15316 
15317       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15318                          N->getOperand(0), N->getOperand(4));
15319     }
15320 
15321     int CompareOpc;
15322     bool isDot;
15323 
15324     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15325         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15326         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15327       assert(isDot && "Can't compare against a vector result!");
15328 
15329       // If this is a comparison against something other than 0/1, then we know
15330       // that the condition is never/always true.
15331       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15332       if (Val != 0 && Val != 1) {
15333         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
15334           return N->getOperand(0);
15335         // Always !=, turn it into an unconditional branch.
15336         return DAG.getNode(ISD::BR, dl, MVT::Other,
15337                            N->getOperand(0), N->getOperand(4));
15338       }
15339 
15340       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15341 
15342       // Create the PPCISD altivec 'dot' comparison node.
15343       SDValue Ops[] = {
15344         LHS.getOperand(2),  // LHS of compare
15345         LHS.getOperand(3),  // RHS of compare
15346         DAG.getConstant(CompareOpc, dl, MVT::i32)
15347       };
15348       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15349       SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
15350 
15351       // Unpack the result based on how the target uses it.
15352       PPC::Predicate CompOpc;
15353       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15354       default:  // Can't happen, don't crash on invalid number though.
15355       case 0:   // Branch on the value of the EQ bit of CR6.
15356         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15357         break;
15358       case 1:   // Branch on the inverted value of the EQ bit of CR6.
15359         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15360         break;
15361       case 2:   // Branch on the value of the LT bit of CR6.
15362         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15363         break;
15364       case 3:   // Branch on the inverted value of the LT bit of CR6.
15365         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15366         break;
15367       }
15368 
15369       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15370                          DAG.getConstant(CompOpc, dl, MVT::i32),
15371                          DAG.getRegister(PPC::CR6, MVT::i32),
15372                          N->getOperand(4), CompNode.getValue(1));
15373     }
15374     break;
15375   }
15376   case ISD::BUILD_VECTOR:
15377     return DAGCombineBuildVector(N, DCI);
15378   case ISD::ABS:
15379     return combineABS(N, DCI);
15380   case ISD::VSELECT:
15381     return combineVSelect(N, DCI);
15382   }
15383 
15384   return SDValue();
15385 }
15386 
15387 SDValue
15388 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
15389                                  SelectionDAG &DAG,
15390                                  SmallVectorImpl<SDNode *> &Created) const {
15391   // fold (sdiv X, pow2)
15392   EVT VT = N->getValueType(0);
15393   if (VT == MVT::i64 && !Subtarget.isPPC64())
15394     return SDValue();
15395   if ((VT != MVT::i32 && VT != MVT::i64) ||
15396       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
15397     return SDValue();
15398 
15399   SDLoc DL(N);
15400   SDValue N0 = N->getOperand(0);
15401 
15402   bool IsNegPow2 = (-Divisor).isPowerOf2();
15403   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15404   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15405 
15406   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15407   Created.push_back(Op.getNode());
15408 
15409   if (IsNegPow2) {
15410     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15411     Created.push_back(Op.getNode());
15412   }
15413 
15414   return Op;
15415 }
15416 
15417 //===----------------------------------------------------------------------===//
15418 // Inline Assembly Support
15419 //===----------------------------------------------------------------------===//
15420 
15421 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15422                                                       KnownBits &Known,
15423                                                       const APInt &DemandedElts,
15424                                                       const SelectionDAG &DAG,
15425                                                       unsigned Depth) const {
15426   Known.resetAll();
15427   switch (Op.getOpcode()) {
15428   default: break;
15429   case PPCISD::LBRX: {
15430     // lhbrx is known to have the top bits cleared out.
15431     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15432       Known.Zero = 0xFFFF0000;
15433     break;
15434   }
15435   case ISD::INTRINSIC_WO_CHAIN: {
15436     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15437     default: break;
15438     case Intrinsic::ppc_altivec_vcmpbfp_p:
15439     case Intrinsic::ppc_altivec_vcmpeqfp_p:
15440     case Intrinsic::ppc_altivec_vcmpequb_p:
15441     case Intrinsic::ppc_altivec_vcmpequh_p:
15442     case Intrinsic::ppc_altivec_vcmpequw_p:
15443     case Intrinsic::ppc_altivec_vcmpequd_p:
15444     case Intrinsic::ppc_altivec_vcmpequq_p:
15445     case Intrinsic::ppc_altivec_vcmpgefp_p:
15446     case Intrinsic::ppc_altivec_vcmpgtfp_p:
15447     case Intrinsic::ppc_altivec_vcmpgtsb_p:
15448     case Intrinsic::ppc_altivec_vcmpgtsh_p:
15449     case Intrinsic::ppc_altivec_vcmpgtsw_p:
15450     case Intrinsic::ppc_altivec_vcmpgtsd_p:
15451     case Intrinsic::ppc_altivec_vcmpgtsq_p:
15452     case Intrinsic::ppc_altivec_vcmpgtub_p:
15453     case Intrinsic::ppc_altivec_vcmpgtuh_p:
15454     case Intrinsic::ppc_altivec_vcmpgtuw_p:
15455     case Intrinsic::ppc_altivec_vcmpgtud_p:
15456     case Intrinsic::ppc_altivec_vcmpgtuq_p:
15457       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
15458       break;
15459     }
15460   }
15461   }
15462 }
15463 
15464 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
15465   switch (Subtarget.getCPUDirective()) {
15466   default: break;
15467   case PPC::DIR_970:
15468   case PPC::DIR_PWR4:
15469   case PPC::DIR_PWR5:
15470   case PPC::DIR_PWR5X:
15471   case PPC::DIR_PWR6:
15472   case PPC::DIR_PWR6X:
15473   case PPC::DIR_PWR7:
15474   case PPC::DIR_PWR8:
15475   case PPC::DIR_PWR9:
15476   case PPC::DIR_PWR10:
15477   case PPC::DIR_PWR_FUTURE: {
15478     if (!ML)
15479       break;
15480 
15481     if (!DisableInnermostLoopAlign32) {
15482       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15483       // so that we can decrease cache misses and branch-prediction misses.
15484       // Actual alignment of the loop will depend on the hotness check and other
15485       // logic in alignBlocks.
15486       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15487         return Align(32);
15488     }
15489 
15490     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15491 
15492     // For small loops (between 5 and 8 instructions), align to a 32-byte
15493     // boundary so that the entire loop fits in one instruction-cache line.
15494     uint64_t LoopSize = 0;
15495     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15496       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15497         LoopSize += TII->getInstSizeInBytes(*J);
15498         if (LoopSize > 32)
15499           break;
15500       }
15501 
15502     if (LoopSize > 16 && LoopSize <= 32)
15503       return Align(32);
15504 
15505     break;
15506   }
15507   }
15508 
15509   return TargetLowering::getPrefLoopAlignment(ML);
15510 }
15511 
15512 /// getConstraintType - Given a constraint, return the type of
15513 /// constraint it is for this target.
15514 PPCTargetLowering::ConstraintType
15515 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
15516   if (Constraint.size() == 1) {
15517     switch (Constraint[0]) {
15518     default: break;
15519     case 'b':
15520     case 'r':
15521     case 'f':
15522     case 'd':
15523     case 'v':
15524     case 'y':
15525       return C_RegisterClass;
15526     case 'Z':
15527       // FIXME: While Z does indicate a memory constraint, it specifically
15528       // indicates an r+r address (used in conjunction with the 'y' modifier
15529       // in the replacement string). Currently, we're forcing the base
15530       // register to be r0 in the asm printer (which is interpreted as zero)
15531       // and forming the complete address in the second register. This is
15532       // suboptimal.
15533       return C_Memory;
15534     }
15535   } else if (Constraint == "wc") { // individual CR bits.
15536     return C_RegisterClass;
15537   } else if (Constraint == "wa" || Constraint == "wd" ||
15538              Constraint == "wf" || Constraint == "ws" ||
15539              Constraint == "wi" || Constraint == "ww") {
15540     return C_RegisterClass; // VSX registers.
15541   }
15542   return TargetLowering::getConstraintType(Constraint);
15543 }
15544 
15545 /// Examine constraint type and operand type and determine a weight value.
15546 /// This object must already have been set up with the operand type
15547 /// and the current alternative constraint selected.
15548 TargetLowering::ConstraintWeight
15549 PPCTargetLowering::getSingleConstraintMatchWeight(
15550     AsmOperandInfo &info, const char *constraint) const {
15551   ConstraintWeight weight = CW_Invalid;
15552   Value *CallOperandVal = info.CallOperandVal;
15553     // If we don't have a value, we can't do a match,
15554     // but allow it at the lowest weight.
15555   if (!CallOperandVal)
15556     return CW_Default;
15557   Type *type = CallOperandVal->getType();
15558 
15559   // Look at the constraint type.
15560   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15561     return CW_Register; // an individual CR bit.
15562   else if ((StringRef(constraint) == "wa" ||
15563             StringRef(constraint) == "wd" ||
15564             StringRef(constraint) == "wf") &&
15565            type->isVectorTy())
15566     return CW_Register;
15567   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15568     return CW_Register; // just hold 64-bit integers data.
15569   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15570     return CW_Register;
15571   else if (StringRef(constraint) == "ww" && type->isFloatTy())
15572     return CW_Register;
15573 
15574   switch (*constraint) {
15575   default:
15576     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15577     break;
15578   case 'b':
15579     if (type->isIntegerTy())
15580       weight = CW_Register;
15581     break;
15582   case 'f':
15583     if (type->isFloatTy())
15584       weight = CW_Register;
15585     break;
15586   case 'd':
15587     if (type->isDoubleTy())
15588       weight = CW_Register;
15589     break;
15590   case 'v':
15591     if (type->isVectorTy())
15592       weight = CW_Register;
15593     break;
15594   case 'y':
15595     weight = CW_Register;
15596     break;
15597   case 'Z':
15598     weight = CW_Memory;
15599     break;
15600   }
15601   return weight;
15602 }
15603 
15604 std::pair<unsigned, const TargetRegisterClass *>
15605 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
15606                                                 StringRef Constraint,
15607                                                 MVT VT) const {
15608   if (Constraint.size() == 1) {
15609     // GCC RS6000 Constraint Letters
15610     switch (Constraint[0]) {
15611     case 'b':   // R1-R31
15612       if (VT == MVT::i64 && Subtarget.isPPC64())
15613         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15614       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15615     case 'r':   // R0-R31
15616       if (VT == MVT::i64 && Subtarget.isPPC64())
15617         return std::make_pair(0U, &PPC::G8RCRegClass);
15618       return std::make_pair(0U, &PPC::GPRCRegClass);
15619     // 'd' and 'f' constraints are both defined to be "the floating point
15620     // registers", where one is for 32-bit and the other for 64-bit. We don't
15621     // really care overly much here so just give them all the same reg classes.
15622     case 'd':
15623     case 'f':
15624       if (Subtarget.hasSPE()) {
15625         if (VT == MVT::f32 || VT == MVT::i32)
15626           return std::make_pair(0U, &PPC::GPRCRegClass);
15627         if (VT == MVT::f64 || VT == MVT::i64)
15628           return std::make_pair(0U, &PPC::SPERCRegClass);
15629       } else {
15630         if (VT == MVT::f32 || VT == MVT::i32)
15631           return std::make_pair(0U, &PPC::F4RCRegClass);
15632         if (VT == MVT::f64 || VT == MVT::i64)
15633           return std::make_pair(0U, &PPC::F8RCRegClass);
15634       }
15635       break;
15636     case 'v':
15637       if (Subtarget.hasAltivec())
15638         return std::make_pair(0U, &PPC::VRRCRegClass);
15639       break;
15640     case 'y':   // crrc
15641       return std::make_pair(0U, &PPC::CRRCRegClass);
15642     }
15643   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15644     // An individual CR bit.
15645     return std::make_pair(0U, &PPC::CRBITRCRegClass);
15646   } else if ((Constraint == "wa" || Constraint == "wd" ||
15647              Constraint == "wf" || Constraint == "wi") &&
15648              Subtarget.hasVSX()) {
15649     return std::make_pair(0U, &PPC::VSRCRegClass);
15650   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15651     if (VT == MVT::f32 && Subtarget.hasP8Vector())
15652       return std::make_pair(0U, &PPC::VSSRCRegClass);
15653     else
15654       return std::make_pair(0U, &PPC::VSFRCRegClass);
15655   }
15656 
15657   // If we name a VSX register, we can't defer to the base class because it
15658   // will not recognize the correct register (their names will be VSL{0-31}
15659   // and V{0-31} so they won't match). So we match them here.
15660   if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15661     int VSNum = atoi(Constraint.data() + 3);
15662     assert(VSNum >= 0 && VSNum <= 63 &&
15663            "Attempted to access a vsr out of range");
15664     if (VSNum < 32)
15665       return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15666     return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15667   }
15668   std::pair<unsigned, const TargetRegisterClass *> R =
15669       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
15670 
15671   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15672   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15673   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15674   // register.
15675   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15676   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15677   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15678       PPC::GPRCRegClass.contains(R.first))
15679     return std::make_pair(TRI->getMatchingSuperReg(R.first,
15680                             PPC::sub_32, &PPC::G8RCRegClass),
15681                           &PPC::G8RCRegClass);
15682 
15683   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15684   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15685     R.first = PPC::CR0;
15686     R.second = &PPC::CRRCRegClass;
15687   }
15688 
15689   return R;
15690 }
15691 
15692 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15693 /// vector.  If it is invalid, don't add anything to Ops.
15694 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15695                                                      std::string &Constraint,
15696                                                      std::vector<SDValue>&Ops,
15697                                                      SelectionDAG &DAG) const {
15698   SDValue Result;
15699 
15700   // Only support length 1 constraints.
15701   if (Constraint.length() > 1) return;
15702 
15703   char Letter = Constraint[0];
15704   switch (Letter) {
15705   default: break;
15706   case 'I':
15707   case 'J':
15708   case 'K':
15709   case 'L':
15710   case 'M':
15711   case 'N':
15712   case 'O':
15713   case 'P': {
15714     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15715     if (!CST) return; // Must be an immediate to match.
15716     SDLoc dl(Op);
15717     int64_t Value = CST->getSExtValue();
15718     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15719                          // numbers are printed as such.
15720     switch (Letter) {
15721     default: llvm_unreachable("Unknown constraint letter!");
15722     case 'I':  // "I" is a signed 16-bit constant.
15723       if (isInt<16>(Value))
15724         Result = DAG.getTargetConstant(Value, dl, TCVT);
15725       break;
15726     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
15727       if (isShiftedUInt<16, 16>(Value))
15728         Result = DAG.getTargetConstant(Value, dl, TCVT);
15729       break;
15730     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
15731       if (isShiftedInt<16, 16>(Value))
15732         Result = DAG.getTargetConstant(Value, dl, TCVT);
15733       break;
15734     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
15735       if (isUInt<16>(Value))
15736         Result = DAG.getTargetConstant(Value, dl, TCVT);
15737       break;
15738     case 'M':  // "M" is a constant that is greater than 31.
15739       if (Value > 31)
15740         Result = DAG.getTargetConstant(Value, dl, TCVT);
15741       break;
15742     case 'N':  // "N" is a positive constant that is an exact power of two.
15743       if (Value > 0 && isPowerOf2_64(Value))
15744         Result = DAG.getTargetConstant(Value, dl, TCVT);
15745       break;
15746     case 'O':  // "O" is the constant zero.
15747       if (Value == 0)
15748         Result = DAG.getTargetConstant(Value, dl, TCVT);
15749       break;
15750     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
15751       if (isInt<16>(-Value))
15752         Result = DAG.getTargetConstant(Value, dl, TCVT);
15753       break;
15754     }
15755     break;
15756   }
15757   }
15758 
15759   if (Result.getNode()) {
15760     Ops.push_back(Result);
15761     return;
15762   }
15763 
15764   // Handle standard constraint letters.
15765   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15766 }
15767 
15768 // isLegalAddressingMode - Return true if the addressing mode represented
15769 // by AM is legal for this target, for a load/store of the specified type.
15770 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
15771                                               const AddrMode &AM, Type *Ty,
15772                                               unsigned AS,
15773                                               Instruction *I) const {
15774   // Vector type r+i form is supported since power9 as DQ form. We don't check
15775   // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15776   // imm form is preferred and the offset can be adjusted to use imm form later
15777   // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15778   // max offset to check legal addressing mode, we should be a little aggressive
15779   // to contain other offsets for that LSRUse.
15780   if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15781     return false;
15782 
15783   // PPC allows a sign-extended 16-bit immediate field.
15784   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15785     return false;
15786 
15787   // No global is ever allowed as a base.
15788   if (AM.BaseGV)
15789     return false;
15790 
15791   // PPC only support r+r,
15792   switch (AM.Scale) {
15793   case 0:  // "r+i" or just "i", depending on HasBaseReg.
15794     break;
15795   case 1:
15796     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
15797       return false;
15798     // Otherwise we have r+r or r+i.
15799     break;
15800   case 2:
15801     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
15802       return false;
15803     // Allow 2*r as r+r.
15804     break;
15805   default:
15806     // No other scales are supported.
15807     return false;
15808   }
15809 
15810   return true;
15811 }
15812 
15813 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15814                                            SelectionDAG &DAG) const {
15815   MachineFunction &MF = DAG.getMachineFunction();
15816   MachineFrameInfo &MFI = MF.getFrameInfo();
15817   MFI.setReturnAddressIsTaken(true);
15818 
15819   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
15820     return SDValue();
15821 
15822   SDLoc dl(Op);
15823   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15824 
15825   // Make sure the function does not optimize away the store of the RA to
15826   // the stack.
15827   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15828   FuncInfo->setLRStoreRequired();
15829   bool isPPC64 = Subtarget.isPPC64();
15830   auto PtrVT = getPointerTy(MF.getDataLayout());
15831 
15832   if (Depth > 0) {
15833     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15834     SDValue Offset =
15835         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15836                         isPPC64 ? MVT::i64 : MVT::i32);
15837     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15838                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15839                        MachinePointerInfo());
15840   }
15841 
15842   // Just load the return address off the stack.
15843   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15844   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15845                      MachinePointerInfo());
15846 }
15847 
15848 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15849                                           SelectionDAG &DAG) const {
15850   SDLoc dl(Op);
15851   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15852 
15853   MachineFunction &MF = DAG.getMachineFunction();
15854   MachineFrameInfo &MFI = MF.getFrameInfo();
15855   MFI.setFrameAddressIsTaken(true);
15856 
15857   EVT PtrVT = getPointerTy(MF.getDataLayout());
15858   bool isPPC64 = PtrVT == MVT::i64;
15859 
15860   // Naked functions never have a frame pointer, and so we use r1. For all
15861   // other functions, this decision must be delayed until during PEI.
15862   unsigned FrameReg;
15863   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15864     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15865   else
15866     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15867 
15868   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15869                                          PtrVT);
15870   while (Depth--)
15871     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15872                             FrameAddr, MachinePointerInfo());
15873   return FrameAddr;
15874 }
15875 
15876 // FIXME? Maybe this could be a TableGen attribute on some registers and
15877 // this table could be generated automatically from RegInfo.
15878 Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
15879                                               const MachineFunction &MF) const {
15880   bool isPPC64 = Subtarget.isPPC64();
15881 
15882   bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15883   if (!is64Bit && VT != LLT::scalar(32))
15884     report_fatal_error("Invalid register global variable type");
15885 
15886   Register Reg = StringSwitch<Register>(RegName)
15887                      .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15888                      .Case("r2", isPPC64 ? Register() : PPC::R2)
15889                      .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15890                      .Default(Register());
15891 
15892   if (Reg)
15893     return Reg;
15894   report_fatal_error("Invalid register name global variable");
15895 }
15896 
15897 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
15898   // 32-bit SVR4 ABI access everything as got-indirect.
15899   if (Subtarget.is32BitELFABI())
15900     return true;
15901 
15902   // AIX accesses everything indirectly through the TOC, which is similar to
15903   // the GOT.
15904   if (Subtarget.isAIXABI())
15905     return true;
15906 
15907   CodeModel::Model CModel = getTargetMachine().getCodeModel();
15908   // If it is small or large code model, module locals are accessed
15909   // indirectly by loading their address from .toc/.got.
15910   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15911     return true;
15912 
15913   // JumpTable and BlockAddress are accessed as got-indirect.
15914   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15915     return true;
15916 
15917   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15918     return Subtarget.isGVIndirectSymbol(G->getGlobal());
15919 
15920   return false;
15921 }
15922 
15923 bool
15924 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
15925   // The PowerPC target isn't yet aware of offsets.
15926   return false;
15927 }
15928 
15929 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
15930                                            const CallInst &I,
15931                                            MachineFunction &MF,
15932                                            unsigned Intrinsic) const {
15933   switch (Intrinsic) {
15934   case Intrinsic::ppc_altivec_lvx:
15935   case Intrinsic::ppc_altivec_lvxl:
15936   case Intrinsic::ppc_altivec_lvebx:
15937   case Intrinsic::ppc_altivec_lvehx:
15938   case Intrinsic::ppc_altivec_lvewx:
15939   case Intrinsic::ppc_vsx_lxvd2x:
15940   case Intrinsic::ppc_vsx_lxvw4x:
15941   case Intrinsic::ppc_vsx_lxvd2x_be:
15942   case Intrinsic::ppc_vsx_lxvw4x_be:
15943   case Intrinsic::ppc_vsx_lxvl:
15944   case Intrinsic::ppc_vsx_lxvll: {
15945     EVT VT;
15946     switch (Intrinsic) {
15947     case Intrinsic::ppc_altivec_lvebx:
15948       VT = MVT::i8;
15949       break;
15950     case Intrinsic::ppc_altivec_lvehx:
15951       VT = MVT::i16;
15952       break;
15953     case Intrinsic::ppc_altivec_lvewx:
15954       VT = MVT::i32;
15955       break;
15956     case Intrinsic::ppc_vsx_lxvd2x:
15957     case Intrinsic::ppc_vsx_lxvd2x_be:
15958       VT = MVT::v2f64;
15959       break;
15960     default:
15961       VT = MVT::v4i32;
15962       break;
15963     }
15964 
15965     Info.opc = ISD::INTRINSIC_W_CHAIN;
15966     Info.memVT = VT;
15967     Info.ptrVal = I.getArgOperand(0);
15968     Info.offset = -VT.getStoreSize()+1;
15969     Info.size = 2*VT.getStoreSize()-1;
15970     Info.align = Align(1);
15971     Info.flags = MachineMemOperand::MOLoad;
15972     return true;
15973   }
15974   case Intrinsic::ppc_altivec_stvx:
15975   case Intrinsic::ppc_altivec_stvxl:
15976   case Intrinsic::ppc_altivec_stvebx:
15977   case Intrinsic::ppc_altivec_stvehx:
15978   case Intrinsic::ppc_altivec_stvewx:
15979   case Intrinsic::ppc_vsx_stxvd2x:
15980   case Intrinsic::ppc_vsx_stxvw4x:
15981   case Intrinsic::ppc_vsx_stxvd2x_be:
15982   case Intrinsic::ppc_vsx_stxvw4x_be:
15983   case Intrinsic::ppc_vsx_stxvl:
15984   case Intrinsic::ppc_vsx_stxvll: {
15985     EVT VT;
15986     switch (Intrinsic) {
15987     case Intrinsic::ppc_altivec_stvebx:
15988       VT = MVT::i8;
15989       break;
15990     case Intrinsic::ppc_altivec_stvehx:
15991       VT = MVT::i16;
15992       break;
15993     case Intrinsic::ppc_altivec_stvewx:
15994       VT = MVT::i32;
15995       break;
15996     case Intrinsic::ppc_vsx_stxvd2x:
15997     case Intrinsic::ppc_vsx_stxvd2x_be:
15998       VT = MVT::v2f64;
15999       break;
16000     default:
16001       VT = MVT::v4i32;
16002       break;
16003     }
16004 
16005     Info.opc = ISD::INTRINSIC_VOID;
16006     Info.memVT = VT;
16007     Info.ptrVal = I.getArgOperand(1);
16008     Info.offset = -VT.getStoreSize()+1;
16009     Info.size = 2*VT.getStoreSize()-1;
16010     Info.align = Align(1);
16011     Info.flags = MachineMemOperand::MOStore;
16012     return true;
16013   }
16014   default:
16015     break;
16016   }
16017 
16018   return false;
16019 }
16020 
16021 /// It returns EVT::Other if the type should be determined using generic
16022 /// target-independent logic.
16023 EVT PPCTargetLowering::getOptimalMemOpType(
16024     const MemOp &Op, const AttributeList &FuncAttributes) const {
16025   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
16026     // We should use Altivec/VSX loads and stores when available. For unaligned
16027     // addresses, unaligned VSX loads are only fast starting with the P8.
16028     if (Subtarget.hasAltivec() && Op.size() >= 16 &&
16029         (Op.isAligned(Align(16)) ||
16030          ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
16031       return MVT::v4i32;
16032   }
16033 
16034   if (Subtarget.isPPC64()) {
16035     return MVT::i64;
16036   }
16037 
16038   return MVT::i32;
16039 }
16040 
16041 /// Returns true if it is beneficial to convert a load of a constant
16042 /// to just the constant itself.
16043 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
16044                                                           Type *Ty) const {
16045   assert(Ty->isIntegerTy());
16046 
16047   unsigned BitSize = Ty->getPrimitiveSizeInBits();
16048   return !(BitSize == 0 || BitSize > 64);
16049 }
16050 
16051 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
16052   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16053     return false;
16054   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16055   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16056   return NumBits1 == 64 && NumBits2 == 32;
16057 }
16058 
16059 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
16060   if (!VT1.isInteger() || !VT2.isInteger())
16061     return false;
16062   unsigned NumBits1 = VT1.getSizeInBits();
16063   unsigned NumBits2 = VT2.getSizeInBits();
16064   return NumBits1 == 64 && NumBits2 == 32;
16065 }
16066 
16067 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
16068   // Generally speaking, zexts are not free, but they are free when they can be
16069   // folded with other operations.
16070   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16071     EVT MemVT = LD->getMemoryVT();
16072     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16073          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16074         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16075          LD->getExtensionType() == ISD::ZEXTLOAD))
16076       return true;
16077   }
16078 
16079   // FIXME: Add other cases...
16080   //  - 32-bit shifts with a zext to i64
16081   //  - zext after ctlz, bswap, etc.
16082   //  - zext after and by a constant mask
16083 
16084   return TargetLowering::isZExtFree(Val, VT2);
16085 }
16086 
16087 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16088   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
16089          "invalid fpext types");
16090   // Extending to float128 is not free.
16091   if (DestVT == MVT::f128)
16092     return false;
16093   return true;
16094 }
16095 
16096 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
16097   return isInt<16>(Imm) || isUInt<16>(Imm);
16098 }
16099 
16100 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
16101   return isInt<16>(Imm) || isUInt<16>(Imm);
16102 }
16103 
16104 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
16105                                                        unsigned,
16106                                                        unsigned,
16107                                                        MachineMemOperand::Flags,
16108                                                        bool *Fast) const {
16109   if (DisablePPCUnaligned)
16110     return false;
16111 
16112   // PowerPC supports unaligned memory access for simple non-vector types.
16113   // Although accessing unaligned addresses is not as efficient as accessing
16114   // aligned addresses, it is generally more efficient than manual expansion,
16115   // and generally only traps for software emulation when crossing page
16116   // boundaries.
16117 
16118   if (!VT.isSimple())
16119     return false;
16120 
16121   if (VT.isFloatingPoint() && !VT.isVector() &&
16122       !Subtarget.allowsUnalignedFPAccess())
16123     return false;
16124 
16125   if (VT.getSimpleVT().isVector()) {
16126     if (Subtarget.hasVSX()) {
16127       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16128           VT != MVT::v4f32 && VT != MVT::v4i32)
16129         return false;
16130     } else {
16131       return false;
16132     }
16133   }
16134 
16135   if (VT == MVT::ppcf128)
16136     return false;
16137 
16138   if (Fast)
16139     *Fast = true;
16140 
16141   return true;
16142 }
16143 
16144 bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
16145                                                SDValue C) const {
16146   // Check integral scalar types.
16147   if (!VT.isScalarInteger())
16148     return false;
16149   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16150     if (!ConstNode->getAPIntValue().isSignedIntN(64))
16151       return false;
16152     // This transformation will generate >= 2 operations. But the following
16153     // cases will generate <= 2 instructions during ISEL. So exclude them.
16154     // 1. If the constant multiplier fits 16 bits, it can be handled by one
16155     // HW instruction, ie. MULLI
16156     // 2. If the multiplier after shifted fits 16 bits, an extra shift
16157     // instruction is needed than case 1, ie. MULLI and RLDICR
16158     int64_t Imm = ConstNode->getSExtValue();
16159     unsigned Shift = countTrailingZeros<uint64_t>(Imm);
16160     Imm >>= Shift;
16161     if (isInt<16>(Imm))
16162       return false;
16163     uint64_t UImm = static_cast<uint64_t>(Imm);
16164     if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16165         isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16166       return true;
16167   }
16168   return false;
16169 }
16170 
16171 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
16172                                                    EVT VT) const {
16173   return isFMAFasterThanFMulAndFAdd(
16174       MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
16175 }
16176 
16177 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
16178                                                    Type *Ty) const {
16179   switch (Ty->getScalarType()->getTypeID()) {
16180   case Type::FloatTyID:
16181   case Type::DoubleTyID:
16182     return true;
16183   case Type::FP128TyID:
16184     return Subtarget.hasP9Vector();
16185   default:
16186     return false;
16187   }
16188 }
16189 
16190 // FIXME: add more patterns which are not profitable to hoist.
16191 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
16192   if (!I->hasOneUse())
16193     return true;
16194 
16195   Instruction *User = I->user_back();
16196   assert(User && "A single use instruction with no uses.");
16197 
16198   switch (I->getOpcode()) {
16199   case Instruction::FMul: {
16200     // Don't break FMA, PowerPC prefers FMA.
16201     if (User->getOpcode() != Instruction::FSub &&
16202         User->getOpcode() != Instruction::FAdd)
16203       return true;
16204 
16205     const TargetOptions &Options = getTargetMachine().Options;
16206     const Function *F = I->getFunction();
16207     const DataLayout &DL = F->getParent()->getDataLayout();
16208     Type *Ty = User->getOperand(0)->getType();
16209 
16210     return !(
16211         isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16212         isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
16213         (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16214   }
16215   case Instruction::Load: {
16216     // Don't break "store (load float*)" pattern, this pattern will be combined
16217     // to "store (load int32)" in later InstCombine pass. See function
16218     // combineLoadToOperationType. On PowerPC, loading a float point takes more
16219     // cycles than loading a 32 bit integer.
16220     LoadInst *LI = cast<LoadInst>(I);
16221     // For the loads that combineLoadToOperationType does nothing, like
16222     // ordered load, it should be profitable to hoist them.
16223     // For swifterror load, it can only be used for pointer to pointer type, so
16224     // later type check should get rid of this case.
16225     if (!LI->isUnordered())
16226       return true;
16227 
16228     if (User->getOpcode() != Instruction::Store)
16229       return true;
16230 
16231     if (I->getType()->getTypeID() != Type::FloatTyID)
16232       return true;
16233 
16234     return false;
16235   }
16236   default:
16237     return true;
16238   }
16239   return true;
16240 }
16241 
16242 const MCPhysReg *
16243 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
16244   // LR is a callee-save register, but we must treat it as clobbered by any call
16245   // site. Hence we include LR in the scratch registers, which are in turn added
16246   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16247   // to CTR, which is used by any indirect call.
16248   static const MCPhysReg ScratchRegs[] = {
16249     PPC::X12, PPC::LR8, PPC::CTR8, 0
16250   };
16251 
16252   return ScratchRegs;
16253 }
16254 
16255 Register PPCTargetLowering::getExceptionPointerRegister(
16256     const Constant *PersonalityFn) const {
16257   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16258 }
16259 
16260 Register PPCTargetLowering::getExceptionSelectorRegister(
16261     const Constant *PersonalityFn) const {
16262   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16263 }
16264 
16265 bool
16266 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
16267                      EVT VT , unsigned DefinedValues) const {
16268   if (VT == MVT::v2i64)
16269     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16270 
16271   if (Subtarget.hasVSX())
16272     return true;
16273 
16274   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
16275 }
16276 
16277 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
16278   if (DisableILPPref || Subtarget.enableMachineScheduler())
16279     return TargetLowering::getSchedulingPreference(N);
16280 
16281   return Sched::ILP;
16282 }
16283 
16284 // Create a fast isel object.
16285 FastISel *
16286 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
16287                                   const TargetLibraryInfo *LibInfo) const {
16288   return PPC::createFastISel(FuncInfo, LibInfo);
16289 }
16290 
16291 // 'Inverted' means the FMA opcode after negating one multiplicand.
16292 // For example, (fma -a b c) = (fnmsub a b c)
16293 static unsigned invertFMAOpcode(unsigned Opc) {
16294   switch (Opc) {
16295   default:
16296     llvm_unreachable("Invalid FMA opcode for PowerPC!");
16297   case ISD::FMA:
16298     return PPCISD::FNMSUB;
16299   case PPCISD::FNMSUB:
16300     return ISD::FMA;
16301   }
16302 }
16303 
16304 SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
16305                                                 bool LegalOps, bool OptForSize,
16306                                                 NegatibleCost &Cost,
16307                                                 unsigned Depth) const {
16308   if (Depth > SelectionDAG::MaxRecursionDepth)
16309     return SDValue();
16310 
16311   unsigned Opc = Op.getOpcode();
16312   EVT VT = Op.getValueType();
16313   SDNodeFlags Flags = Op.getNode()->getFlags();
16314 
16315   switch (Opc) {
16316   case PPCISD::FNMSUB:
16317     if (!Op.hasOneUse() || !isTypeLegal(VT))
16318       break;
16319 
16320     const TargetOptions &Options = getTargetMachine().Options;
16321     SDValue N0 = Op.getOperand(0);
16322     SDValue N1 = Op.getOperand(1);
16323     SDValue N2 = Op.getOperand(2);
16324     SDLoc Loc(Op);
16325 
16326     NegatibleCost N2Cost = NegatibleCost::Expensive;
16327     SDValue NegN2 =
16328         getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16329 
16330     if (!NegN2)
16331       return SDValue();
16332 
16333     // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16334     // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16335     // These transformations may change sign of zeroes. For example,
16336     // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16337     if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16338       // Try and choose the cheaper one to negate.
16339       NegatibleCost N0Cost = NegatibleCost::Expensive;
16340       SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16341                                            N0Cost, Depth + 1);
16342 
16343       NegatibleCost N1Cost = NegatibleCost::Expensive;
16344       SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16345                                            N1Cost, Depth + 1);
16346 
16347       if (NegN0 && N0Cost <= N1Cost) {
16348         Cost = std::min(N0Cost, N2Cost);
16349         return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16350       } else if (NegN1) {
16351         Cost = std::min(N1Cost, N2Cost);
16352         return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16353       }
16354     }
16355 
16356     // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16357     if (isOperationLegal(ISD::FMA, VT)) {
16358       Cost = N2Cost;
16359       return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16360     }
16361 
16362     break;
16363   }
16364 
16365   return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16366                                               Cost, Depth);
16367 }
16368 
16369 // Override to enable LOAD_STACK_GUARD lowering on Linux.
16370 bool PPCTargetLowering::useLoadStackGuardNode() const {
16371   if (!Subtarget.isTargetLinux())
16372     return TargetLowering::useLoadStackGuardNode();
16373   return true;
16374 }
16375 
16376 // Override to disable global variable loading on Linux.
16377 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
16378   if (!Subtarget.isTargetLinux())
16379     return TargetLowering::insertSSPDeclarations(M);
16380 }
16381 
16382 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
16383                                      bool ForCodeSize) const {
16384   if (!VT.isSimple() || !Subtarget.hasVSX())
16385     return false;
16386 
16387   switch(VT.getSimpleVT().SimpleTy) {
16388   default:
16389     // For FP types that are currently not supported by PPC backend, return
16390     // false. Examples: f16, f80.
16391     return false;
16392   case MVT::f32:
16393   case MVT::f64:
16394     if (Subtarget.hasPrefixInstrs()) {
16395       // With prefixed instructions, we can materialize anything that can be
16396       // represented with a 32-bit immediate, not just positive zero.
16397       APFloat APFloatOfImm = Imm;
16398       return convertToNonDenormSingle(APFloatOfImm);
16399     }
16400     LLVM_FALLTHROUGH;
16401   case MVT::ppcf128:
16402     return Imm.isPosZero();
16403   }
16404 }
16405 
16406 // For vector shift operation op, fold
16407 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16408 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
16409                                   SelectionDAG &DAG) {
16410   SDValue N0 = N->getOperand(0);
16411   SDValue N1 = N->getOperand(1);
16412   EVT VT = N0.getValueType();
16413   unsigned OpSizeInBits = VT.getScalarSizeInBits();
16414   unsigned Opcode = N->getOpcode();
16415   unsigned TargetOpcode;
16416 
16417   switch (Opcode) {
16418   default:
16419     llvm_unreachable("Unexpected shift operation");
16420   case ISD::SHL:
16421     TargetOpcode = PPCISD::SHL;
16422     break;
16423   case ISD::SRL:
16424     TargetOpcode = PPCISD::SRL;
16425     break;
16426   case ISD::SRA:
16427     TargetOpcode = PPCISD::SRA;
16428     break;
16429   }
16430 
16431   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16432       N1->getOpcode() == ISD::AND)
16433     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
16434       if (Mask->getZExtValue() == OpSizeInBits - 1)
16435         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16436 
16437   return SDValue();
16438 }
16439 
16440 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
16441   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16442     return Value;
16443 
16444   SDValue N0 = N->getOperand(0);
16445   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16446   if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
16447       N0.getOpcode() != ISD::SIGN_EXTEND ||
16448       N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
16449       N->getValueType(0) != MVT::i64)
16450     return SDValue();
16451 
16452   // We can't save an operation here if the value is already extended, and
16453   // the existing shift is easier to combine.
16454   SDValue ExtsSrc = N0.getOperand(0);
16455   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
16456       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
16457     return SDValue();
16458 
16459   SDLoc DL(N0);
16460   SDValue ShiftBy = SDValue(CN1, 0);
16461   // We want the shift amount to be i32 on the extswli, but the shift could
16462   // have an i64.
16463   if (ShiftBy.getValueType() == MVT::i64)
16464     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
16465 
16466   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
16467                          ShiftBy);
16468 }
16469 
16470 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
16471   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16472     return Value;
16473 
16474   return SDValue();
16475 }
16476 
16477 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
16478   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16479     return Value;
16480 
16481   return SDValue();
16482 }
16483 
16484 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16485 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16486 // When C is zero, the equation (addi Z, -C) can be simplified to Z
16487 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16488 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
16489                                  const PPCSubtarget &Subtarget) {
16490   if (!Subtarget.isPPC64())
16491     return SDValue();
16492 
16493   SDValue LHS = N->getOperand(0);
16494   SDValue RHS = N->getOperand(1);
16495 
16496   auto isZextOfCompareWithConstant = [](SDValue Op) {
16497     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16498         Op.getValueType() != MVT::i64)
16499       return false;
16500 
16501     SDValue Cmp = Op.getOperand(0);
16502     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16503         Cmp.getOperand(0).getValueType() != MVT::i64)
16504       return false;
16505 
16506     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16507       int64_t NegConstant = 0 - Constant->getSExtValue();
16508       // Due to the limitations of the addi instruction,
16509       // -C is required to be [-32768, 32767].
16510       return isInt<16>(NegConstant);
16511     }
16512 
16513     return false;
16514   };
16515 
16516   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16517   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16518 
16519   // If there is a pattern, canonicalize a zext operand to the RHS.
16520   if (LHSHasPattern && !RHSHasPattern)
16521     std::swap(LHS, RHS);
16522   else if (!LHSHasPattern && !RHSHasPattern)
16523     return SDValue();
16524 
16525   SDLoc DL(N);
16526   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16527   SDValue Cmp = RHS.getOperand(0);
16528   SDValue Z = Cmp.getOperand(0);
16529   auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16530 
16531   assert(Constant && "Constant Should not be a null pointer.");
16532   int64_t NegConstant = 0 - Constant->getSExtValue();
16533 
16534   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16535   default: break;
16536   case ISD::SETNE: {
16537     //                                 when C == 0
16538     //                             --> addze X, (addic Z, -1).carry
16539     //                            /
16540     // add X, (zext(setne Z, C))--
16541     //                            \    when -32768 <= -C <= 32767 && C != 0
16542     //                             --> addze X, (addic (addi Z, -C), -1).carry
16543     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16544                               DAG.getConstant(NegConstant, DL, MVT::i64));
16545     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16546     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16547                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16548     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16549                        SDValue(Addc.getNode(), 1));
16550     }
16551   case ISD::SETEQ: {
16552     //                                 when C == 0
16553     //                             --> addze X, (subfic Z, 0).carry
16554     //                            /
16555     // add X, (zext(sete  Z, C))--
16556     //                            \    when -32768 <= -C <= 32767 && C != 0
16557     //                             --> addze X, (subfic (addi Z, -C), 0).carry
16558     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16559                               DAG.getConstant(NegConstant, DL, MVT::i64));
16560     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16561     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16562                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16563     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16564                        SDValue(Subc.getNode(), 1));
16565     }
16566   }
16567 
16568   return SDValue();
16569 }
16570 
16571 // Transform
16572 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16573 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16574 // In this case both C1 and C2 must be known constants.
16575 // C1+C2 must fit into a 34 bit signed integer.
16576 static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
16577                                           const PPCSubtarget &Subtarget) {
16578   if (!Subtarget.isUsingPCRelativeCalls())
16579     return SDValue();
16580 
16581   // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16582   // If we find that node try to cast the Global Address and the Constant.
16583   SDValue LHS = N->getOperand(0);
16584   SDValue RHS = N->getOperand(1);
16585 
16586   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16587     std::swap(LHS, RHS);
16588 
16589   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16590     return SDValue();
16591 
16592   // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16593   GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16594   ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16595 
16596   // Check that both casts succeeded.
16597   if (!GSDN || !ConstNode)
16598     return SDValue();
16599 
16600   int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16601   SDLoc DL(GSDN);
16602 
16603   // The signed int offset needs to fit in 34 bits.
16604   if (!isInt<34>(NewOffset))
16605     return SDValue();
16606 
16607   // The new global address is a copy of the old global address except
16608   // that it has the updated Offset.
16609   SDValue GA =
16610       DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16611                                  NewOffset, GSDN->getTargetFlags());
16612   SDValue MatPCRel =
16613       DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16614   return MatPCRel;
16615 }
16616 
16617 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16618   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16619     return Value;
16620 
16621   if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16622     return Value;
16623 
16624   return SDValue();
16625 }
16626 
16627 // Detect TRUNCATE operations on bitcasts of float128 values.
16628 // What we are looking for here is the situtation where we extract a subset
16629 // of bits from a 128 bit float.
16630 // This can be of two forms:
16631 // 1) BITCAST of f128 feeding TRUNCATE
16632 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16633 // The reason this is required is because we do not have a legal i128 type
16634 // and so we want to prevent having to store the f128 and then reload part
16635 // of it.
16636 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16637                                            DAGCombinerInfo &DCI) const {
16638   // If we are using CRBits then try that first.
16639   if (Subtarget.useCRBits()) {
16640     // Check if CRBits did anything and return that if it did.
16641     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16642       return CRTruncValue;
16643   }
16644 
16645   SDLoc dl(N);
16646   SDValue Op0 = N->getOperand(0);
16647 
16648   // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16649   if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16650     EVT VT = N->getValueType(0);
16651     if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16652       return SDValue();
16653     SDValue Sub = Op0.getOperand(0);
16654     if (Sub.getOpcode() == ISD::SUB) {
16655       SDValue SubOp0 = Sub.getOperand(0);
16656       SDValue SubOp1 = Sub.getOperand(1);
16657       if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16658           (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16659         return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16660                                SubOp1.getOperand(0),
16661                                DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16662       }
16663     }
16664   }
16665 
16666   // Looking for a truncate of i128 to i64.
16667   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16668     return SDValue();
16669 
16670   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16671 
16672   // SRL feeding TRUNCATE.
16673   if (Op0.getOpcode() == ISD::SRL) {
16674     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16675     // The right shift has to be by 64 bits.
16676     if (!ConstNode || ConstNode->getZExtValue() != 64)
16677       return SDValue();
16678 
16679     // Switch the element number to extract.
16680     EltToExtract = EltToExtract ? 0 : 1;
16681     // Update Op0 past the SRL.
16682     Op0 = Op0.getOperand(0);
16683   }
16684 
16685   // BITCAST feeding a TRUNCATE possibly via SRL.
16686   if (Op0.getOpcode() == ISD::BITCAST &&
16687       Op0.getValueType() == MVT::i128 &&
16688       Op0.getOperand(0).getValueType() == MVT::f128) {
16689     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16690     return DCI.DAG.getNode(
16691         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16692         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16693   }
16694   return SDValue();
16695 }
16696 
16697 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16698   SelectionDAG &DAG = DCI.DAG;
16699 
16700   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16701   if (!ConstOpOrElement)
16702     return SDValue();
16703 
16704   // An imul is usually smaller than the alternative sequence for legal type.
16705   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16706       isOperationLegal(ISD::MUL, N->getValueType(0)))
16707     return SDValue();
16708 
16709   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16710     switch (this->Subtarget.getCPUDirective()) {
16711     default:
16712       // TODO: enhance the condition for subtarget before pwr8
16713       return false;
16714     case PPC::DIR_PWR8:
16715       //  type        mul     add    shl
16716       // scalar        4       1      1
16717       // vector        7       2      2
16718       return true;
16719     case PPC::DIR_PWR9:
16720     case PPC::DIR_PWR10:
16721     case PPC::DIR_PWR_FUTURE:
16722       //  type        mul     add    shl
16723       // scalar        5       2      2
16724       // vector        7       2      2
16725 
16726       // The cycle RATIO of related operations are showed as a table above.
16727       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16728       // scalar and vector type. For 2 instrs patterns, add/sub + shl
16729       // are 4, it is always profitable; but for 3 instrs patterns
16730       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16731       // So we should only do it for vector type.
16732       return IsAddOne && IsNeg ? VT.isVector() : true;
16733     }
16734   };
16735 
16736   EVT VT = N->getValueType(0);
16737   SDLoc DL(N);
16738 
16739   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16740   bool IsNeg = MulAmt.isNegative();
16741   APInt MulAmtAbs = MulAmt.abs();
16742 
16743   if ((MulAmtAbs - 1).isPowerOf2()) {
16744     // (mul x, 2^N + 1) => (add (shl x, N), x)
16745     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16746 
16747     if (!IsProfitable(IsNeg, true, VT))
16748       return SDValue();
16749 
16750     SDValue Op0 = N->getOperand(0);
16751     SDValue Op1 =
16752         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16753                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16754     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16755 
16756     if (!IsNeg)
16757       return Res;
16758 
16759     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16760   } else if ((MulAmtAbs + 1).isPowerOf2()) {
16761     // (mul x, 2^N - 1) => (sub (shl x, N), x)
16762     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16763 
16764     if (!IsProfitable(IsNeg, false, VT))
16765       return SDValue();
16766 
16767     SDValue Op0 = N->getOperand(0);
16768     SDValue Op1 =
16769         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16770                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16771 
16772     if (!IsNeg)
16773       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16774     else
16775       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16776 
16777   } else {
16778     return SDValue();
16779   }
16780 }
16781 
16782 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16783 // in combiner since we need to check SD flags and other subtarget features.
16784 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16785                                           DAGCombinerInfo &DCI) const {
16786   SDValue N0 = N->getOperand(0);
16787   SDValue N1 = N->getOperand(1);
16788   SDValue N2 = N->getOperand(2);
16789   SDNodeFlags Flags = N->getFlags();
16790   EVT VT = N->getValueType(0);
16791   SelectionDAG &DAG = DCI.DAG;
16792   const TargetOptions &Options = getTargetMachine().Options;
16793   unsigned Opc = N->getOpcode();
16794   bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16795   bool LegalOps = !DCI.isBeforeLegalizeOps();
16796   SDLoc Loc(N);
16797 
16798   if (!isOperationLegal(ISD::FMA, VT))
16799     return SDValue();
16800 
16801   // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16802   // since (fnmsub a b c)=-0 while c-ab=+0.
16803   if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16804     return SDValue();
16805 
16806   // (fma (fneg a) b c) => (fnmsub a b c)
16807   // (fnmsub (fneg a) b c) => (fma a b c)
16808   if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16809     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16810 
16811   // (fma a (fneg b) c) => (fnmsub a b c)
16812   // (fnmsub a (fneg b) c) => (fma a b c)
16813   if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16814     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16815 
16816   return SDValue();
16817 }
16818 
16819 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16820   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16821   if (!Subtarget.is64BitELFABI())
16822     return false;
16823 
16824   // If not a tail call then no need to proceed.
16825   if (!CI->isTailCall())
16826     return false;
16827 
16828   // If sibling calls have been disabled and tail-calls aren't guaranteed
16829   // there is no reason to duplicate.
16830   auto &TM = getTargetMachine();
16831   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16832     return false;
16833 
16834   // Can't tail call a function called indirectly, or if it has variadic args.
16835   const Function *Callee = CI->getCalledFunction();
16836   if (!Callee || Callee->isVarArg())
16837     return false;
16838 
16839   // Make sure the callee and caller calling conventions are eligible for tco.
16840   const Function *Caller = CI->getParent()->getParent();
16841   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16842                                            CI->getCallingConv()))
16843       return false;
16844 
16845   // If the function is local then we have a good chance at tail-calling it
16846   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16847 }
16848 
16849 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16850   if (!Subtarget.hasVSX())
16851     return false;
16852   if (Subtarget.hasP9Vector() && VT == MVT::f128)
16853     return true;
16854   return VT == MVT::f32 || VT == MVT::f64 ||
16855     VT == MVT::v4f32 || VT == MVT::v2f64;
16856 }
16857 
16858 bool PPCTargetLowering::
16859 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16860   const Value *Mask = AndI.getOperand(1);
16861   // If the mask is suitable for andi. or andis. we should sink the and.
16862   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16863     // Can't handle constants wider than 64-bits.
16864     if (CI->getBitWidth() > 64)
16865       return false;
16866     int64_t ConstVal = CI->getZExtValue();
16867     return isUInt<16>(ConstVal) ||
16868       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16869   }
16870 
16871   // For non-constant masks, we can always use the record-form and.
16872   return true;
16873 }
16874 
16875 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16876 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16877 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16878 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16879 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16880 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16881   assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
16882   assert(Subtarget.hasP9Altivec() &&
16883          "Only combine this when P9 altivec supported!");
16884   EVT VT = N->getValueType(0);
16885   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16886     return SDValue();
16887 
16888   SelectionDAG &DAG = DCI.DAG;
16889   SDLoc dl(N);
16890   if (N->getOperand(0).getOpcode() == ISD::SUB) {
16891     // Even for signed integers, if it's known to be positive (as signed
16892     // integer) due to zero-extended inputs.
16893     unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16894     unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16895     if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16896          SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16897         (SubOpcd1 == ISD::ZERO_EXTEND ||
16898          SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16899       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16900                          N->getOperand(0)->getOperand(0),
16901                          N->getOperand(0)->getOperand(1),
16902                          DAG.getTargetConstant(0, dl, MVT::i32));
16903     }
16904 
16905     // For type v4i32, it can be optimized with xvnegsp + vabsduw
16906     if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16907         N->getOperand(0).hasOneUse()) {
16908       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16909                          N->getOperand(0)->getOperand(0),
16910                          N->getOperand(0)->getOperand(1),
16911                          DAG.getTargetConstant(1, dl, MVT::i32));
16912     }
16913   }
16914 
16915   return SDValue();
16916 }
16917 
16918 // For type v4i32/v8ii16/v16i8, transform
16919 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16920 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16921 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16922 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16923 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16924                                           DAGCombinerInfo &DCI) const {
16925   assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
16926   assert(Subtarget.hasP9Altivec() &&
16927          "Only combine this when P9 altivec supported!");
16928 
16929   SelectionDAG &DAG = DCI.DAG;
16930   SDLoc dl(N);
16931   SDValue Cond = N->getOperand(0);
16932   SDValue TrueOpnd = N->getOperand(1);
16933   SDValue FalseOpnd = N->getOperand(2);
16934   EVT VT = N->getOperand(1).getValueType();
16935 
16936   if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16937       FalseOpnd.getOpcode() != ISD::SUB)
16938     return SDValue();
16939 
16940   // ABSD only available for type v4i32/v8i16/v16i8
16941   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16942     return SDValue();
16943 
16944   // At least to save one more dependent computation
16945   if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16946     return SDValue();
16947 
16948   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16949 
16950   // Can only handle unsigned comparison here
16951   switch (CC) {
16952   default:
16953     return SDValue();
16954   case ISD::SETUGT:
16955   case ISD::SETUGE:
16956     break;
16957   case ISD::SETULT:
16958   case ISD::SETULE:
16959     std::swap(TrueOpnd, FalseOpnd);
16960     break;
16961   }
16962 
16963   SDValue CmpOpnd1 = Cond.getOperand(0);
16964   SDValue CmpOpnd2 = Cond.getOperand(1);
16965 
16966   // SETCC CmpOpnd1 CmpOpnd2 cond
16967   // TrueOpnd = CmpOpnd1 - CmpOpnd2
16968   // FalseOpnd = CmpOpnd2 - CmpOpnd1
16969   if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16970       TrueOpnd.getOperand(1) == CmpOpnd2 &&
16971       FalseOpnd.getOperand(0) == CmpOpnd2 &&
16972       FalseOpnd.getOperand(1) == CmpOpnd1) {
16973     return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16974                        CmpOpnd1, CmpOpnd2,
16975                        DAG.getTargetConstant(0, dl, MVT::i32));
16976   }
16977 
16978   return SDValue();
16979 }
16980