1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56 #include "llvm/CodeGen/TargetRegisterInfo.h"
57 #include "llvm/CodeGen/ValueTypes.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSectionXCOFF.h"
78 #include "llvm/MC/MCSymbolXCOFF.h"
79 #include "llvm/Support/AtomicOrdering.h"
80 #include "llvm/Support/BranchProbability.h"
81 #include "llvm/Support/Casting.h"
82 #include "llvm/Support/CodeGen.h"
83 #include "llvm/Support/CommandLine.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
86 #include "llvm/Support/ErrorHandling.h"
87 #include "llvm/Support/Format.h"
88 #include "llvm/Support/KnownBits.h"
89 #include "llvm/Support/MachineValueType.h"
90 #include "llvm/Support/MathExtras.h"
91 #include "llvm/Support/raw_ostream.h"
92 #include "llvm/Target/TargetMachine.h"
93 #include "llvm/Target/TargetOptions.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
124 // TODO - Remove this option if soft fp128 has been fully supported .
125 static cl::opt<bool>
126     EnableSoftFP128("enable-soft-fp128",
127                     cl::desc("temp option to enable soft fp128"), cl::Hidden);
128 
129 STATISTIC(NumTailCalls, "Number of tail calls");
130 STATISTIC(NumSiblingCalls, "Number of sibling calls");
131 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
132 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
133 
134 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135 
136 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137 
138 // FIXME: Remove this once the bug has been fixed!
139 extern cl::opt<bool> ANDIGlueBug;
140 
141 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
142                                      const PPCSubtarget &STI)
143     : TargetLowering(TM), Subtarget(STI) {
144   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145   // arguments are at least 4/8 bytes aligned.
146   bool isPPC64 = Subtarget.isPPC64();
147   setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148 
149   // Set up the register classes.
150   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151   if (!useSoftFloat()) {
152     if (hasSPE()) {
153       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154       addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
155     } else {
156       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
157       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
158     }
159   }
160 
161   // Match BITREVERSE to customized fast code sequence in the td file.
162   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
163   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
164 
165   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
166   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
167 
168   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
169   for (MVT VT : MVT::integer_valuetypes()) {
170     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
171     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
172   }
173 
174   if (Subtarget.isISA3_0()) {
175     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
176     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
177     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
178     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
179   } else {
180     // No extending loads from f16 or HW conversions back and forth.
181     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
182     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
183     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
184     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
185     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
186     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
187     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
188     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189   }
190 
191   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
192 
193   // PowerPC has pre-inc load and store's.
194   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
195   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
196   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
197   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
198   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
199   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
200   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
201   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
202   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
203   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
204   if (!Subtarget.hasSPE()) {
205     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
206     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
207     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
208     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
209   }
210 
211   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
212   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
213   for (MVT VT : ScalarIntVTs) {
214     setOperationAction(ISD::ADDC, VT, Legal);
215     setOperationAction(ISD::ADDE, VT, Legal);
216     setOperationAction(ISD::SUBC, VT, Legal);
217     setOperationAction(ISD::SUBE, VT, Legal);
218   }
219 
220   if (Subtarget.useCRBits()) {
221     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
222 
223     if (isPPC64 || Subtarget.hasFPCVT()) {
224       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
225       AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
226                         isPPC64 ? MVT::i64 : MVT::i32);
227       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
228       AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
229                         isPPC64 ? MVT::i64 : MVT::i32);
230 
231       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
232       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
233                          isPPC64 ? MVT::i64 : MVT::i32);
234       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
235       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
236                         isPPC64 ? MVT::i64 : MVT::i32);
237     } else {
238       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
239       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
240       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
241       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
242     }
243 
244     // PowerPC does not support direct load/store of condition registers.
245     setOperationAction(ISD::LOAD, MVT::i1, Custom);
246     setOperationAction(ISD::STORE, MVT::i1, Custom);
247 
248     // FIXME: Remove this once the ANDI glue bug is fixed:
249     if (ANDIGlueBug)
250       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
251 
252     for (MVT VT : MVT::integer_valuetypes()) {
253       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
254       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
255       setTruncStoreAction(VT, MVT::i1, Expand);
256     }
257 
258     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
259   }
260 
261   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
262   // PPC (the libcall is not available).
263   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
264   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
265   setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
266   setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
267 
268   // We do not currently implement these libm ops for PowerPC.
269   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
270   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
271   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
272   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
273   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
274   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
275 
276   // PowerPC has no SREM/UREM instructions unless we are on P9
277   // On P9 we may use a hardware instruction to compute the remainder.
278   // When the result of both the remainder and the division is required it is
279   // more efficient to compute the remainder from the result of the division
280   // rather than use the remainder instruction. The instructions are legalized
281   // directly because the DivRemPairsPass performs the transformation at the IR
282   // level.
283   if (Subtarget.isISA3_0()) {
284     setOperationAction(ISD::SREM, MVT::i32, Legal);
285     setOperationAction(ISD::UREM, MVT::i32, Legal);
286     setOperationAction(ISD::SREM, MVT::i64, Legal);
287     setOperationAction(ISD::UREM, MVT::i64, Legal);
288   } else {
289     setOperationAction(ISD::SREM, MVT::i32, Expand);
290     setOperationAction(ISD::UREM, MVT::i32, Expand);
291     setOperationAction(ISD::SREM, MVT::i64, Expand);
292     setOperationAction(ISD::UREM, MVT::i64, Expand);
293   }
294 
295   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
296   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
297   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
298   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
299   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
300   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
301   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
302   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
303   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
304 
305   // Handle constrained floating-point operations of scalar.
306   // TODO: Handle SPE specific operation.
307   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
308   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
309   setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
310   setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
311   setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
312   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
313 
314   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
315   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
316   setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
317   setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
318   setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
319   if (Subtarget.hasVSX()) {
320     setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
321     setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
322   }
323 
324   if (Subtarget.hasFSQRT()) {
325     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
326     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
327   }
328 
329   if (Subtarget.hasFPRND()) {
330     setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
331     setOperationAction(ISD::STRICT_FCEIL,  MVT::f32, Legal);
332     setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
333     setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
334 
335     setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
336     setOperationAction(ISD::STRICT_FCEIL,  MVT::f64, Legal);
337     setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
338     setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
339   }
340 
341   // We don't support sin/cos/sqrt/fmod/pow
342   setOperationAction(ISD::FSIN , MVT::f64, Expand);
343   setOperationAction(ISD::FCOS , MVT::f64, Expand);
344   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
345   setOperationAction(ISD::FREM , MVT::f64, Expand);
346   setOperationAction(ISD::FPOW , MVT::f64, Expand);
347   setOperationAction(ISD::FSIN , MVT::f32, Expand);
348   setOperationAction(ISD::FCOS , MVT::f32, Expand);
349   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
350   setOperationAction(ISD::FREM , MVT::f32, Expand);
351   setOperationAction(ISD::FPOW , MVT::f32, Expand);
352   if (Subtarget.hasSPE()) {
353     setOperationAction(ISD::FMA  , MVT::f64, Expand);
354     setOperationAction(ISD::FMA  , MVT::f32, Expand);
355   } else {
356     setOperationAction(ISD::FMA  , MVT::f64, Legal);
357     setOperationAction(ISD::FMA  , MVT::f32, Legal);
358   }
359 
360   if (Subtarget.hasSPE())
361     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
362 
363   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
364 
365   // If we're enabling GP optimizations, use hardware square root
366   if (!Subtarget.hasFSQRT() &&
367       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
368         Subtarget.hasFRE()))
369     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
370 
371   if (!Subtarget.hasFSQRT() &&
372       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
373         Subtarget.hasFRES()))
374     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
375 
376   if (Subtarget.hasFCPSGN()) {
377     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
378     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
379   } else {
380     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
381     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
382   }
383 
384   if (Subtarget.hasFPRND()) {
385     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
386     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
387     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
388     setOperationAction(ISD::FROUND, MVT::f64, Legal);
389 
390     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
391     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
392     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
393     setOperationAction(ISD::FROUND, MVT::f32, Legal);
394   }
395 
396   // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
397   // to speed up scalar BSWAP64.
398   // CTPOP or CTTZ were introduced in P8/P9 respectively
399   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
400   if (Subtarget.hasP9Vector())
401     setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
402   else
403     setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
404   if (Subtarget.isISA3_0()) {
405     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
406     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
407   } else {
408     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
409     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
410   }
411 
412   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
413     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
414     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
415   } else {
416     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
417     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
418   }
419 
420   // PowerPC does not have ROTR
421   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
422   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
423 
424   if (!Subtarget.useCRBits()) {
425     // PowerPC does not have Select
426     setOperationAction(ISD::SELECT, MVT::i32, Expand);
427     setOperationAction(ISD::SELECT, MVT::i64, Expand);
428     setOperationAction(ISD::SELECT, MVT::f32, Expand);
429     setOperationAction(ISD::SELECT, MVT::f64, Expand);
430   }
431 
432   // PowerPC wants to turn select_cc of FP into fsel when possible.
433   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
434   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
435 
436   // PowerPC wants to optimize integer setcc a bit
437   if (!Subtarget.useCRBits())
438     setOperationAction(ISD::SETCC, MVT::i32, Custom);
439 
440   if (Subtarget.hasFPU()) {
441     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
442     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
443     setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
444 
445     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
446     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
447     setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
448   }
449 
450   // PowerPC does not have BRCOND which requires SetCC
451   if (!Subtarget.useCRBits())
452     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
453 
454   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
455 
456   if (Subtarget.hasSPE()) {
457     // SPE has built-in conversions
458     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
459     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
460     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
461     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
462     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
463     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
464   } else {
465     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
466     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
467     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
468 
469     // PowerPC does not have [U|S]INT_TO_FP
470     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
471     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
472     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
473     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
474   }
475 
476   if (Subtarget.hasDirectMove() && isPPC64) {
477     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
478     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
479     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
480     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
481     if (TM.Options.UnsafeFPMath) {
482       setOperationAction(ISD::LRINT, MVT::f64, Legal);
483       setOperationAction(ISD::LRINT, MVT::f32, Legal);
484       setOperationAction(ISD::LLRINT, MVT::f64, Legal);
485       setOperationAction(ISD::LLRINT, MVT::f32, Legal);
486       setOperationAction(ISD::LROUND, MVT::f64, Legal);
487       setOperationAction(ISD::LROUND, MVT::f32, Legal);
488       setOperationAction(ISD::LLROUND, MVT::f64, Legal);
489       setOperationAction(ISD::LLROUND, MVT::f32, Legal);
490     }
491   } else {
492     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
493     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
494     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
495     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
496   }
497 
498   // We cannot sextinreg(i1).  Expand to shifts.
499   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
500 
501   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
502   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
503   // support continuation, user-level threading, and etc.. As a result, no
504   // other SjLj exception interfaces are implemented and please don't build
505   // your own exception handling based on them.
506   // LLVM/Clang supports zero-cost DWARF exception handling.
507   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
508   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
509 
510   // We want to legalize GlobalAddress and ConstantPool nodes into the
511   // appropriate instructions to materialize the address.
512   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
513   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
514   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
515   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
516   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
517   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
518   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
519   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
520   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
521   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
522 
523   // TRAP is legal.
524   setOperationAction(ISD::TRAP, MVT::Other, Legal);
525 
526   // TRAMPOLINE is custom lowered.
527   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
528   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
529 
530   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
531   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
532 
533   if (Subtarget.is64BitELFABI()) {
534     // VAARG always uses double-word chunks, so promote anything smaller.
535     setOperationAction(ISD::VAARG, MVT::i1, Promote);
536     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
537     setOperationAction(ISD::VAARG, MVT::i8, Promote);
538     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
539     setOperationAction(ISD::VAARG, MVT::i16, Promote);
540     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
541     setOperationAction(ISD::VAARG, MVT::i32, Promote);
542     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
543     setOperationAction(ISD::VAARG, MVT::Other, Expand);
544   } else if (Subtarget.is32BitELFABI()) {
545     // VAARG is custom lowered with the 32-bit SVR4 ABI.
546     setOperationAction(ISD::VAARG, MVT::Other, Custom);
547     setOperationAction(ISD::VAARG, MVT::i64, Custom);
548   } else
549     setOperationAction(ISD::VAARG, MVT::Other, Expand);
550 
551   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
552   if (Subtarget.is32BitELFABI())
553     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
554   else
555     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
556 
557   // Use the default implementation.
558   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
559   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
560   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
561   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
562   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
563   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
564   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
565   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
566   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
567 
568   // We want to custom lower some of our intrinsics.
569   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
570 
571   // To handle counter-based loop conditions.
572   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
573 
574   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
575   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
576   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
577   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
578 
579   // Comparisons that require checking two conditions.
580   if (Subtarget.hasSPE()) {
581     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
582     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
583     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
584     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
585   }
586   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
587   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
588   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
589   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
590   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
591   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
592   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
593   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
594   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
595   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
596   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
597   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
598 
599   if (Subtarget.has64BitSupport()) {
600     // They also have instructions for converting between i64 and fp.
601     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
602     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
603     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
604     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
605     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
606     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
607     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
608     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
609     // This is just the low 32 bits of a (signed) fp->i64 conversion.
610     // We cannot do this with Promote because i64 is not a legal type.
611     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
612     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
613 
614     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
615       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
616       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
617     }
618   } else {
619     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
620     if (Subtarget.hasSPE()) {
621       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
622       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
623     } else {
624       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
625       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
626     }
627   }
628 
629   // With the instructions enabled under FPCVT, we can do everything.
630   if (Subtarget.hasFPCVT()) {
631     if (Subtarget.has64BitSupport()) {
632       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
633       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
634       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
635       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
636       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
637       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
638       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
639       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
640     }
641 
642     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
643     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
644     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
645     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
646     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
647     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
648     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
649     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
650   }
651 
652   if (Subtarget.use64BitRegs()) {
653     // 64-bit PowerPC implementations can support i64 types directly
654     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
655     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
656     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
657     // 64-bit PowerPC wants to expand i128 shifts itself.
658     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
659     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
660     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
661   } else {
662     // 32-bit PowerPC wants to expand i64 shifts itself.
663     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
664     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
665     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
666   }
667 
668   // PowerPC has better expansions for funnel shifts than the generic
669   // TargetLowering::expandFunnelShift.
670   if (Subtarget.has64BitSupport()) {
671     setOperationAction(ISD::FSHL, MVT::i64, Custom);
672     setOperationAction(ISD::FSHR, MVT::i64, Custom);
673   }
674   setOperationAction(ISD::FSHL, MVT::i32, Custom);
675   setOperationAction(ISD::FSHR, MVT::i32, Custom);
676 
677   if (Subtarget.hasVSX()) {
678     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
679     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
680     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
681     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
682   }
683 
684   if (Subtarget.hasAltivec()) {
685     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
686       setOperationAction(ISD::SADDSAT, VT, Legal);
687       setOperationAction(ISD::SSUBSAT, VT, Legal);
688       setOperationAction(ISD::UADDSAT, VT, Legal);
689       setOperationAction(ISD::USUBSAT, VT, Legal);
690     }
691     // First set operation action for all vector types to expand. Then we
692     // will selectively turn on ones that can be effectively codegen'd.
693     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
694       // add/sub are legal for all supported vector VT's.
695       setOperationAction(ISD::ADD, VT, Legal);
696       setOperationAction(ISD::SUB, VT, Legal);
697 
698       // For v2i64, these are only valid with P8Vector. This is corrected after
699       // the loop.
700       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
701         setOperationAction(ISD::SMAX, VT, Legal);
702         setOperationAction(ISD::SMIN, VT, Legal);
703         setOperationAction(ISD::UMAX, VT, Legal);
704         setOperationAction(ISD::UMIN, VT, Legal);
705       }
706       else {
707         setOperationAction(ISD::SMAX, VT, Expand);
708         setOperationAction(ISD::SMIN, VT, Expand);
709         setOperationAction(ISD::UMAX, VT, Expand);
710         setOperationAction(ISD::UMIN, VT, Expand);
711       }
712 
713       if (Subtarget.hasVSX()) {
714         setOperationAction(ISD::FMAXNUM, VT, Legal);
715         setOperationAction(ISD::FMINNUM, VT, Legal);
716       }
717 
718       // Vector instructions introduced in P8
719       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
720         setOperationAction(ISD::CTPOP, VT, Legal);
721         setOperationAction(ISD::CTLZ, VT, Legal);
722       }
723       else {
724         setOperationAction(ISD::CTPOP, VT, Expand);
725         setOperationAction(ISD::CTLZ, VT, Expand);
726       }
727 
728       // Vector instructions introduced in P9
729       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
730         setOperationAction(ISD::CTTZ, VT, Legal);
731       else
732         setOperationAction(ISD::CTTZ, VT, Expand);
733 
734       // We promote all shuffles to v16i8.
735       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
736       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
737 
738       // We promote all non-typed operations to v4i32.
739       setOperationAction(ISD::AND   , VT, Promote);
740       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
741       setOperationAction(ISD::OR    , VT, Promote);
742       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
743       setOperationAction(ISD::XOR   , VT, Promote);
744       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
745       setOperationAction(ISD::LOAD  , VT, Promote);
746       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
747       setOperationAction(ISD::SELECT, VT, Promote);
748       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
749       setOperationAction(ISD::VSELECT, VT, Legal);
750       setOperationAction(ISD::SELECT_CC, VT, Promote);
751       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
752       setOperationAction(ISD::STORE, VT, Promote);
753       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
754 
755       // No other operations are legal.
756       setOperationAction(ISD::MUL , VT, Expand);
757       setOperationAction(ISD::SDIV, VT, Expand);
758       setOperationAction(ISD::SREM, VT, Expand);
759       setOperationAction(ISD::UDIV, VT, Expand);
760       setOperationAction(ISD::UREM, VT, Expand);
761       setOperationAction(ISD::FDIV, VT, Expand);
762       setOperationAction(ISD::FREM, VT, Expand);
763       setOperationAction(ISD::FNEG, VT, Expand);
764       setOperationAction(ISD::FSQRT, VT, Expand);
765       setOperationAction(ISD::FLOG, VT, Expand);
766       setOperationAction(ISD::FLOG10, VT, Expand);
767       setOperationAction(ISD::FLOG2, VT, Expand);
768       setOperationAction(ISD::FEXP, VT, Expand);
769       setOperationAction(ISD::FEXP2, VT, Expand);
770       setOperationAction(ISD::FSIN, VT, Expand);
771       setOperationAction(ISD::FCOS, VT, Expand);
772       setOperationAction(ISD::FABS, VT, Expand);
773       setOperationAction(ISD::FFLOOR, VT, Expand);
774       setOperationAction(ISD::FCEIL,  VT, Expand);
775       setOperationAction(ISD::FTRUNC, VT, Expand);
776       setOperationAction(ISD::FRINT,  VT, Expand);
777       setOperationAction(ISD::FNEARBYINT, VT, Expand);
778       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
779       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
780       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
781       setOperationAction(ISD::MULHU, VT, Expand);
782       setOperationAction(ISD::MULHS, VT, Expand);
783       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
784       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
785       setOperationAction(ISD::UDIVREM, VT, Expand);
786       setOperationAction(ISD::SDIVREM, VT, Expand);
787       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
788       setOperationAction(ISD::FPOW, VT, Expand);
789       setOperationAction(ISD::BSWAP, VT, Expand);
790       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
791       setOperationAction(ISD::ROTL, VT, Expand);
792       setOperationAction(ISD::ROTR, VT, Expand);
793 
794       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
795         setTruncStoreAction(VT, InnerVT, Expand);
796         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
797         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
798         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
799       }
800     }
801     setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
802     if (!Subtarget.hasP8Vector()) {
803       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
804       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
805       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
806       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
807     }
808 
809     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
810     // with merges, splats, etc.
811     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
812 
813     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
814     // are cheap, so handle them before they get expanded to scalar.
815     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
816     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
817     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
818     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
819     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
820 
821     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
822     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
823     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
824     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
825     setOperationAction(ISD::SELECT, MVT::v4i32,
826                        Subtarget.useCRBits() ? Legal : Expand);
827     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
828     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
829     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
830     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
831     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
832     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
833     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
834     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
835     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
836     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
837     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
838     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
839     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
840 
841     // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
842     setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
843     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
844     if (Subtarget.hasAltivec())
845       for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
846         setOperationAction(ISD::ROTL, VT, Legal);
847     // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
848     if (Subtarget.hasP8Altivec())
849       setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
850 
851     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
852     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
853     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
854     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
855 
856     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
857     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
858 
859     if (Subtarget.hasVSX()) {
860       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
861       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
862     }
863 
864     if (Subtarget.hasP8Altivec())
865       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
866     else
867       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
868 
869     if (Subtarget.isISA3_1()) {
870       setOperationAction(ISD::MUL, MVT::v2i64, Legal);
871       setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
872       setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
873       setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
874       setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
875       setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
876       setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
877       setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
878       setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
879       setOperationAction(ISD::UREM, MVT::v2i64, Legal);
880       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
881       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
882       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
883       setOperationAction(ISD::UREM, MVT::v1i128, Legal);
884       setOperationAction(ISD::SREM, MVT::v1i128, Legal);
885       setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
886       setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
887       setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
888     }
889 
890     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
891     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
892 
893     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
894     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
895 
896     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
897     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
898     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
899     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
900 
901     // Altivec does not contain unordered floating-point compare instructions
902     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
903     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
904     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
905     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
906 
907     if (Subtarget.hasVSX()) {
908       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
909       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
910       if (Subtarget.hasP8Vector()) {
911         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
912         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
913       }
914       if (Subtarget.hasDirectMove() && isPPC64) {
915         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
916         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
917         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
918         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
919         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
920         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
921         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
922         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
923       }
924       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
925 
926       // The nearbyint variants are not allowed to raise the inexact exception
927       // so we can only code-gen them with unsafe math.
928       if (TM.Options.UnsafeFPMath) {
929         setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
930         setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
931       }
932 
933       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
934       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
935       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
936       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
937       setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
938       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
939       setOperationAction(ISD::FROUND, MVT::f64, Legal);
940       setOperationAction(ISD::FRINT, MVT::f64, Legal);
941 
942       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
943       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
944       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
945       setOperationAction(ISD::FROUND, MVT::f32, Legal);
946       setOperationAction(ISD::FRINT, MVT::f32, Legal);
947 
948       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
949       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
950 
951       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
952       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
953 
954       // Share the Altivec comparison restrictions.
955       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
956       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
957       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
958       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
959 
960       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
961       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
962 
963       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
964 
965       if (Subtarget.hasP8Vector())
966         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
967 
968       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
969 
970       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
971       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
972       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
973 
974       if (Subtarget.hasP8Altivec()) {
975         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
976         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
977         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
978 
979         // 128 bit shifts can be accomplished via 3 instructions for SHL and
980         // SRL, but not for SRA because of the instructions available:
981         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
982         // doing
983         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
984         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
985         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
986 
987         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
988       }
989       else {
990         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
991         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
992         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
993 
994         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
995 
996         // VSX v2i64 only supports non-arithmetic operations.
997         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
998         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
999       }
1000 
1001       if (Subtarget.isISA3_1())
1002         setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1003       else
1004         setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1005 
1006       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1007       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1008       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1009       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1010 
1011       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1012 
1013       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1014       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1015       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1016       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1017       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1018       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1019       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1020       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1021 
1022       // Custom handling for partial vectors of integers converted to
1023       // floating point. We already have optimal handling for v2i32 through
1024       // the DAG combine, so those aren't necessary.
1025       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1026       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1027       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1028       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1029       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1030       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1031       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1032       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1033       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1034       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1035       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1036       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1037       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1038       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1039       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1040       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1041 
1042       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1043       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1044       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1045       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1046       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1047       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1048 
1049       if (Subtarget.hasDirectMove())
1050         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1051       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1052 
1053       // Handle constrained floating-point operations of vector.
1054       // The predictor is `hasVSX` because altivec instruction has
1055       // no exception but VSX vector instruction has.
1056       setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1057       setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1058       setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1059       setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1060       setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1061       setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1062       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1063       setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1064       setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1065       setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1066       setOperationAction(ISD::STRICT_FCEIL,  MVT::v4f32, Legal);
1067       setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1068       setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1069 
1070       setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1071       setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1072       setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1073       setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1074       setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1075       setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1076       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1077       setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1078       setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1079       setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1080       setOperationAction(ISD::STRICT_FCEIL,  MVT::v2f64, Legal);
1081       setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1082       setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1083 
1084       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1085     }
1086 
1087     if (Subtarget.hasP8Altivec()) {
1088       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1089       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1090     }
1091 
1092     if (Subtarget.hasP9Vector()) {
1093       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1094       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1095 
1096       // 128 bit shifts can be accomplished via 3 instructions for SHL and
1097       // SRL, but not for SRA because of the instructions available:
1098       // VS{RL} and VS{RL}O.
1099       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1100       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1101       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1102 
1103       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1104       setOperationAction(ISD::FADD, MVT::f128, Legal);
1105       setOperationAction(ISD::FSUB, MVT::f128, Legal);
1106       setOperationAction(ISD::FDIV, MVT::f128, Legal);
1107       setOperationAction(ISD::FMUL, MVT::f128, Legal);
1108       setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1109       // No extending loads to f128 on PPC.
1110       for (MVT FPT : MVT::fp_valuetypes())
1111         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1112       setOperationAction(ISD::FMA, MVT::f128, Legal);
1113       setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1114       setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1115       setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1116       setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1117       setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1118       setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1119 
1120       setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1121       setOperationAction(ISD::FRINT, MVT::f128, Legal);
1122       setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1123       setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1124       setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1125       setOperationAction(ISD::FROUND, MVT::f128, Legal);
1126 
1127       setOperationAction(ISD::SELECT, MVT::f128, Expand);
1128       setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1129       setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1130       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1131       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1132       setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1133       // No implementation for these ops for PowerPC.
1134       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1135       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1136       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1137       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1138       setOperationAction(ISD::FREM, MVT::f128, Expand);
1139 
1140       // Handle constrained floating-point operations of fp128
1141       setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1142       setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1143       setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1144       setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1145       setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1146       setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1147       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1148       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1149       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1150       setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1151       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1152       setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1153       setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1154       setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1155       setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1156       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1157       setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1158       setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1159       setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1160       setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1161     } else if (Subtarget.hasAltivec() && EnableSoftFP128) {
1162       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1163 
1164       for (MVT FPT : MVT::fp_valuetypes())
1165         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1166 
1167       setOperationAction(ISD::LOAD, MVT::f128, Promote);
1168       setOperationAction(ISD::STORE, MVT::f128, Promote);
1169 
1170       AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1171       AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1172 
1173       setOperationAction(ISD::FADD, MVT::f128, Expand);
1174       setOperationAction(ISD::FSUB, MVT::f128, Expand);
1175       setOperationAction(ISD::FMUL, MVT::f128, Expand);
1176       setOperationAction(ISD::FDIV, MVT::f128, Expand);
1177       setOperationAction(ISD::FNEG, MVT::f128, Expand);
1178       setOperationAction(ISD::FABS, MVT::f128, Expand);
1179       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1180       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1181       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1182       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1183       setOperationAction(ISD::FREM, MVT::f128, Expand);
1184       setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1185       setOperationAction(ISD::FMA, MVT::f128, Expand);
1186       setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
1187     }
1188 
1189     if (Subtarget.hasP9Altivec()) {
1190       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1191       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1192 
1193       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1194       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1195       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1196       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1197       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1198       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1199       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1200     }
1201   }
1202 
1203   if (Subtarget.pairedVectorMemops()) {
1204     addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1205     setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1206     setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1207   }
1208   if (Subtarget.hasMMA()) {
1209     addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1210     setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1211     setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1212     setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1213   }
1214 
1215   if (Subtarget.has64BitSupport())
1216     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1217 
1218   if (Subtarget.isISA3_1())
1219     setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1220 
1221   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1222 
1223   if (!isPPC64) {
1224     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1225     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1226   }
1227 
1228   setBooleanContents(ZeroOrOneBooleanContent);
1229 
1230   if (Subtarget.hasAltivec()) {
1231     // Altivec instructions set fields to all zeros or all ones.
1232     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1233   }
1234 
1235   if (!isPPC64) {
1236     // These libcalls are not available in 32-bit.
1237     setLibcallName(RTLIB::SHL_I128, nullptr);
1238     setLibcallName(RTLIB::SRL_I128, nullptr);
1239     setLibcallName(RTLIB::SRA_I128, nullptr);
1240   }
1241 
1242   if (!isPPC64)
1243     setMaxAtomicSizeInBitsSupported(32);
1244 
1245   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1246 
1247   // We have target-specific dag combine patterns for the following nodes:
1248   setTargetDAGCombine(ISD::ADD);
1249   setTargetDAGCombine(ISD::SHL);
1250   setTargetDAGCombine(ISD::SRA);
1251   setTargetDAGCombine(ISD::SRL);
1252   setTargetDAGCombine(ISD::MUL);
1253   setTargetDAGCombine(ISD::FMA);
1254   setTargetDAGCombine(ISD::SINT_TO_FP);
1255   setTargetDAGCombine(ISD::BUILD_VECTOR);
1256   if (Subtarget.hasFPCVT())
1257     setTargetDAGCombine(ISD::UINT_TO_FP);
1258   setTargetDAGCombine(ISD::LOAD);
1259   setTargetDAGCombine(ISD::STORE);
1260   setTargetDAGCombine(ISD::BR_CC);
1261   if (Subtarget.useCRBits())
1262     setTargetDAGCombine(ISD::BRCOND);
1263   setTargetDAGCombine(ISD::BSWAP);
1264   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1265   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1266   setTargetDAGCombine(ISD::INTRINSIC_VOID);
1267 
1268   setTargetDAGCombine(ISD::SIGN_EXTEND);
1269   setTargetDAGCombine(ISD::ZERO_EXTEND);
1270   setTargetDAGCombine(ISD::ANY_EXTEND);
1271 
1272   setTargetDAGCombine(ISD::TRUNCATE);
1273   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1274 
1275 
1276   if (Subtarget.useCRBits()) {
1277     setTargetDAGCombine(ISD::TRUNCATE);
1278     setTargetDAGCombine(ISD::SETCC);
1279     setTargetDAGCombine(ISD::SELECT_CC);
1280   }
1281 
1282   if (Subtarget.hasP9Altivec()) {
1283     setTargetDAGCombine(ISD::ABS);
1284     setTargetDAGCombine(ISD::VSELECT);
1285   }
1286 
1287   setLibcallName(RTLIB::LOG_F128, "logf128");
1288   setLibcallName(RTLIB::LOG2_F128, "log2f128");
1289   setLibcallName(RTLIB::LOG10_F128, "log10f128");
1290   setLibcallName(RTLIB::EXP_F128, "expf128");
1291   setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1292   setLibcallName(RTLIB::SIN_F128, "sinf128");
1293   setLibcallName(RTLIB::COS_F128, "cosf128");
1294   setLibcallName(RTLIB::POW_F128, "powf128");
1295   setLibcallName(RTLIB::FMIN_F128, "fminf128");
1296   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1297   setLibcallName(RTLIB::POWI_F128, "__powikf2");
1298   setLibcallName(RTLIB::REM_F128, "fmodf128");
1299 
1300   // With 32 condition bits, we don't need to sink (and duplicate) compares
1301   // aggressively in CodeGenPrep.
1302   if (Subtarget.useCRBits()) {
1303     setHasMultipleConditionRegisters();
1304     setJumpIsExpensive();
1305   }
1306 
1307   setMinFunctionAlignment(Align(4));
1308 
1309   switch (Subtarget.getCPUDirective()) {
1310   default: break;
1311   case PPC::DIR_970:
1312   case PPC::DIR_A2:
1313   case PPC::DIR_E500:
1314   case PPC::DIR_E500mc:
1315   case PPC::DIR_E5500:
1316   case PPC::DIR_PWR4:
1317   case PPC::DIR_PWR5:
1318   case PPC::DIR_PWR5X:
1319   case PPC::DIR_PWR6:
1320   case PPC::DIR_PWR6X:
1321   case PPC::DIR_PWR7:
1322   case PPC::DIR_PWR8:
1323   case PPC::DIR_PWR9:
1324   case PPC::DIR_PWR10:
1325   case PPC::DIR_PWR_FUTURE:
1326     setPrefLoopAlignment(Align(16));
1327     setPrefFunctionAlignment(Align(16));
1328     break;
1329   }
1330 
1331   if (Subtarget.enableMachineScheduler())
1332     setSchedulingPreference(Sched::Source);
1333   else
1334     setSchedulingPreference(Sched::Hybrid);
1335 
1336   computeRegisterProperties(STI.getRegisterInfo());
1337 
1338   // The Freescale cores do better with aggressive inlining of memcpy and
1339   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1340   if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1341       Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1342     MaxStoresPerMemset = 32;
1343     MaxStoresPerMemsetOptSize = 16;
1344     MaxStoresPerMemcpy = 32;
1345     MaxStoresPerMemcpyOptSize = 8;
1346     MaxStoresPerMemmove = 32;
1347     MaxStoresPerMemmoveOptSize = 8;
1348   } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1349     // The A2 also benefits from (very) aggressive inlining of memcpy and
1350     // friends. The overhead of a the function call, even when warm, can be
1351     // over one hundred cycles.
1352     MaxStoresPerMemset = 128;
1353     MaxStoresPerMemcpy = 128;
1354     MaxStoresPerMemmove = 128;
1355     MaxLoadsPerMemcmp = 128;
1356   } else {
1357     MaxLoadsPerMemcmp = 8;
1358     MaxLoadsPerMemcmpOptSize = 4;
1359   }
1360 
1361   IsStrictFPEnabled = true;
1362 
1363   // Let the subtarget (CPU) decide if a predictable select is more expensive
1364   // than the corresponding branch. This information is used in CGP to decide
1365   // when to convert selects into branches.
1366   PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1367 }
1368 
1369 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1370 /// the desired ByVal argument alignment.
1371 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1372   if (MaxAlign == MaxMaxAlign)
1373     return;
1374   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1375     if (MaxMaxAlign >= 32 &&
1376         VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1377       MaxAlign = Align(32);
1378     else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1379              MaxAlign < 16)
1380       MaxAlign = Align(16);
1381   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1382     Align EltAlign;
1383     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1384     if (EltAlign > MaxAlign)
1385       MaxAlign = EltAlign;
1386   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1387     for (auto *EltTy : STy->elements()) {
1388       Align EltAlign;
1389       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1390       if (EltAlign > MaxAlign)
1391         MaxAlign = EltAlign;
1392       if (MaxAlign == MaxMaxAlign)
1393         break;
1394     }
1395   }
1396 }
1397 
1398 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1399 /// function arguments in the caller parameter area.
1400 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1401                                                   const DataLayout &DL) const {
1402   // 16byte and wider vectors are passed on 16byte boundary.
1403   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1404   Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1405   if (Subtarget.hasAltivec())
1406     getMaxByValAlign(Ty, Alignment, Align(16));
1407   return Alignment.value();
1408 }
1409 
1410 bool PPCTargetLowering::useSoftFloat() const {
1411   return Subtarget.useSoftFloat();
1412 }
1413 
1414 bool PPCTargetLowering::hasSPE() const {
1415   return Subtarget.hasSPE();
1416 }
1417 
1418 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1419   return VT.isScalarInteger();
1420 }
1421 
1422 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1423   switch ((PPCISD::NodeType)Opcode) {
1424   case PPCISD::FIRST_NUMBER:    break;
1425   case PPCISD::FSEL:            return "PPCISD::FSEL";
1426   case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
1427   case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
1428   case PPCISD::FCFID:           return "PPCISD::FCFID";
1429   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1430   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1431   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1432   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1433   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1434   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1435   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1436   case PPCISD::FP_TO_UINT_IN_VSR:
1437                                 return "PPCISD::FP_TO_UINT_IN_VSR,";
1438   case PPCISD::FP_TO_SINT_IN_VSR:
1439                                 return "PPCISD::FP_TO_SINT_IN_VSR";
1440   case PPCISD::FRE:             return "PPCISD::FRE";
1441   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1442   case PPCISD::FTSQRT:
1443     return "PPCISD::FTSQRT";
1444   case PPCISD::FSQRT:
1445     return "PPCISD::FSQRT";
1446   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1447   case PPCISD::VPERM:           return "PPCISD::VPERM";
1448   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1449   case PPCISD::XXSPLTI_SP_TO_DP:
1450     return "PPCISD::XXSPLTI_SP_TO_DP";
1451   case PPCISD::XXSPLTI32DX:
1452     return "PPCISD::XXSPLTI32DX";
1453   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1454   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1455   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1456   case PPCISD::CMPB:            return "PPCISD::CMPB";
1457   case PPCISD::Hi:              return "PPCISD::Hi";
1458   case PPCISD::Lo:              return "PPCISD::Lo";
1459   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1460   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1461   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1462   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1463   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1464   case PPCISD::PROBED_ALLOCA:   return "PPCISD::PROBED_ALLOCA";
1465   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1466   case PPCISD::SRL:             return "PPCISD::SRL";
1467   case PPCISD::SRA:             return "PPCISD::SRA";
1468   case PPCISD::SHL:             return "PPCISD::SHL";
1469   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1470   case PPCISD::CALL:            return "PPCISD::CALL";
1471   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1472   case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1473   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1474   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1475   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1476   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1477   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1478   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1479   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1480   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1481   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1482   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1483   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1484   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1485   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1486   case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1487     return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1488   case PPCISD::ANDI_rec_1_EQ_BIT:
1489     return "PPCISD::ANDI_rec_1_EQ_BIT";
1490   case PPCISD::ANDI_rec_1_GT_BIT:
1491     return "PPCISD::ANDI_rec_1_GT_BIT";
1492   case PPCISD::VCMP:            return "PPCISD::VCMP";
1493   case PPCISD::VCMP_rec:        return "PPCISD::VCMP_rec";
1494   case PPCISD::LBRX:            return "PPCISD::LBRX";
1495   case PPCISD::STBRX:           return "PPCISD::STBRX";
1496   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1497   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1498   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1499   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1500   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1501   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1502   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1503   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1504   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1505   case PPCISD::ST_VSR_SCAL_INT:
1506                                 return "PPCISD::ST_VSR_SCAL_INT";
1507   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1508   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1509   case PPCISD::BDZ:             return "PPCISD::BDZ";
1510   case PPCISD::MFFS:            return "PPCISD::MFFS";
1511   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1512   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1513   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1514   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1515   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1516   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1517   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1518   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1519   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1520   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1521   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1522   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1523   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1524   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1525   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1526   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1527   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1528   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1529   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1530   case PPCISD::PADDI_DTPREL:
1531     return "PPCISD::PADDI_DTPREL";
1532   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1533   case PPCISD::SC:              return "PPCISD::SC";
1534   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1535   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1536   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1537   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1538   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1539   case PPCISD::VABSD:           return "PPCISD::VABSD";
1540   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1541   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1542   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1543   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1544   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1545   case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1546   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1547   case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1548     return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1549   case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1550     return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1551   case PPCISD::ACC_BUILD:       return "PPCISD::ACC_BUILD";
1552   case PPCISD::PAIR_BUILD:      return "PPCISD::PAIR_BUILD";
1553   case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1554   case PPCISD::XXMFACC:         return "PPCISD::XXMFACC";
1555   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1556   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
1557   case PPCISD::STRICT_FADDRTZ:
1558     return "PPCISD::STRICT_FADDRTZ";
1559   case PPCISD::STRICT_FCTIDZ:
1560     return "PPCISD::STRICT_FCTIDZ";
1561   case PPCISD::STRICT_FCTIWZ:
1562     return "PPCISD::STRICT_FCTIWZ";
1563   case PPCISD::STRICT_FCTIDUZ:
1564     return "PPCISD::STRICT_FCTIDUZ";
1565   case PPCISD::STRICT_FCTIWUZ:
1566     return "PPCISD::STRICT_FCTIWUZ";
1567   case PPCISD::STRICT_FCFID:
1568     return "PPCISD::STRICT_FCFID";
1569   case PPCISD::STRICT_FCFIDU:
1570     return "PPCISD::STRICT_FCFIDU";
1571   case PPCISD::STRICT_FCFIDS:
1572     return "PPCISD::STRICT_FCFIDS";
1573   case PPCISD::STRICT_FCFIDUS:
1574     return "PPCISD::STRICT_FCFIDUS";
1575   case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
1576   }
1577   return nullptr;
1578 }
1579 
1580 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1581                                           EVT VT) const {
1582   if (!VT.isVector())
1583     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1584 
1585   return VT.changeVectorElementTypeToInteger();
1586 }
1587 
1588 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1589   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1590   return true;
1591 }
1592 
1593 //===----------------------------------------------------------------------===//
1594 // Node matching predicates, for use by the tblgen matching code.
1595 //===----------------------------------------------------------------------===//
1596 
1597 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1598 static bool isFloatingPointZero(SDValue Op) {
1599   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1600     return CFP->getValueAPF().isZero();
1601   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1602     // Maybe this has already been legalized into the constant pool?
1603     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1604       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1605         return CFP->getValueAPF().isZero();
1606   }
1607   return false;
1608 }
1609 
1610 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1611 /// true if Op is undef or if it matches the specified value.
1612 static bool isConstantOrUndef(int Op, int Val) {
1613   return Op < 0 || Op == Val;
1614 }
1615 
1616 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1617 /// VPKUHUM instruction.
1618 /// The ShuffleKind distinguishes between big-endian operations with
1619 /// two different inputs (0), either-endian operations with two identical
1620 /// inputs (1), and little-endian operations with two different inputs (2).
1621 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1622 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1623                                SelectionDAG &DAG) {
1624   bool IsLE = DAG.getDataLayout().isLittleEndian();
1625   if (ShuffleKind == 0) {
1626     if (IsLE)
1627       return false;
1628     for (unsigned i = 0; i != 16; ++i)
1629       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1630         return false;
1631   } else if (ShuffleKind == 2) {
1632     if (!IsLE)
1633       return false;
1634     for (unsigned i = 0; i != 16; ++i)
1635       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1636         return false;
1637   } else if (ShuffleKind == 1) {
1638     unsigned j = IsLE ? 0 : 1;
1639     for (unsigned i = 0; i != 8; ++i)
1640       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1641           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1642         return false;
1643   }
1644   return true;
1645 }
1646 
1647 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1648 /// VPKUWUM instruction.
1649 /// The ShuffleKind distinguishes between big-endian operations with
1650 /// two different inputs (0), either-endian operations with two identical
1651 /// inputs (1), and little-endian operations with two different inputs (2).
1652 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1653 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1654                                SelectionDAG &DAG) {
1655   bool IsLE = DAG.getDataLayout().isLittleEndian();
1656   if (ShuffleKind == 0) {
1657     if (IsLE)
1658       return false;
1659     for (unsigned i = 0; i != 16; i += 2)
1660       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1661           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1662         return false;
1663   } else if (ShuffleKind == 2) {
1664     if (!IsLE)
1665       return false;
1666     for (unsigned i = 0; i != 16; i += 2)
1667       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1668           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1669         return false;
1670   } else if (ShuffleKind == 1) {
1671     unsigned j = IsLE ? 0 : 2;
1672     for (unsigned i = 0; i != 8; i += 2)
1673       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1674           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1675           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1676           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1677         return false;
1678   }
1679   return true;
1680 }
1681 
1682 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1683 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1684 /// current subtarget.
1685 ///
1686 /// The ShuffleKind distinguishes between big-endian operations with
1687 /// two different inputs (0), either-endian operations with two identical
1688 /// inputs (1), and little-endian operations with two different inputs (2).
1689 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1690 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1691                                SelectionDAG &DAG) {
1692   const PPCSubtarget& Subtarget =
1693       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1694   if (!Subtarget.hasP8Vector())
1695     return false;
1696 
1697   bool IsLE = DAG.getDataLayout().isLittleEndian();
1698   if (ShuffleKind == 0) {
1699     if (IsLE)
1700       return false;
1701     for (unsigned i = 0; i != 16; i += 4)
1702       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1703           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1704           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1705           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1706         return false;
1707   } else if (ShuffleKind == 2) {
1708     if (!IsLE)
1709       return false;
1710     for (unsigned i = 0; i != 16; i += 4)
1711       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1712           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1713           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1714           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1715         return false;
1716   } else if (ShuffleKind == 1) {
1717     unsigned j = IsLE ? 0 : 4;
1718     for (unsigned i = 0; i != 8; i += 4)
1719       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1720           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1721           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1722           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1723           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1724           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1725           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1726           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1727         return false;
1728   }
1729   return true;
1730 }
1731 
1732 /// isVMerge - Common function, used to match vmrg* shuffles.
1733 ///
1734 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1735                      unsigned LHSStart, unsigned RHSStart) {
1736   if (N->getValueType(0) != MVT::v16i8)
1737     return false;
1738   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1739          "Unsupported merge size!");
1740 
1741   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1742     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1743       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1744                              LHSStart+j+i*UnitSize) ||
1745           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1746                              RHSStart+j+i*UnitSize))
1747         return false;
1748     }
1749   return true;
1750 }
1751 
1752 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1753 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1754 /// The ShuffleKind distinguishes between big-endian merges with two
1755 /// different inputs (0), either-endian merges with two identical inputs (1),
1756 /// and little-endian merges with two different inputs (2).  For the latter,
1757 /// the input operands are swapped (see PPCInstrAltivec.td).
1758 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1759                              unsigned ShuffleKind, SelectionDAG &DAG) {
1760   if (DAG.getDataLayout().isLittleEndian()) {
1761     if (ShuffleKind == 1) // unary
1762       return isVMerge(N, UnitSize, 0, 0);
1763     else if (ShuffleKind == 2) // swapped
1764       return isVMerge(N, UnitSize, 0, 16);
1765     else
1766       return false;
1767   } else {
1768     if (ShuffleKind == 1) // unary
1769       return isVMerge(N, UnitSize, 8, 8);
1770     else if (ShuffleKind == 0) // normal
1771       return isVMerge(N, UnitSize, 8, 24);
1772     else
1773       return false;
1774   }
1775 }
1776 
1777 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1778 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1779 /// The ShuffleKind distinguishes between big-endian merges with two
1780 /// different inputs (0), either-endian merges with two identical inputs (1),
1781 /// and little-endian merges with two different inputs (2).  For the latter,
1782 /// the input operands are swapped (see PPCInstrAltivec.td).
1783 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1784                              unsigned ShuffleKind, SelectionDAG &DAG) {
1785   if (DAG.getDataLayout().isLittleEndian()) {
1786     if (ShuffleKind == 1) // unary
1787       return isVMerge(N, UnitSize, 8, 8);
1788     else if (ShuffleKind == 2) // swapped
1789       return isVMerge(N, UnitSize, 8, 24);
1790     else
1791       return false;
1792   } else {
1793     if (ShuffleKind == 1) // unary
1794       return isVMerge(N, UnitSize, 0, 0);
1795     else if (ShuffleKind == 0) // normal
1796       return isVMerge(N, UnitSize, 0, 16);
1797     else
1798       return false;
1799   }
1800 }
1801 
1802 /**
1803  * Common function used to match vmrgew and vmrgow shuffles
1804  *
1805  * The indexOffset determines whether to look for even or odd words in
1806  * the shuffle mask. This is based on the of the endianness of the target
1807  * machine.
1808  *   - Little Endian:
1809  *     - Use offset of 0 to check for odd elements
1810  *     - Use offset of 4 to check for even elements
1811  *   - Big Endian:
1812  *     - Use offset of 0 to check for even elements
1813  *     - Use offset of 4 to check for odd elements
1814  * A detailed description of the vector element ordering for little endian and
1815  * big endian can be found at
1816  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1817  * Targeting your applications - what little endian and big endian IBM XL C/C++
1818  * compiler differences mean to you
1819  *
1820  * The mask to the shuffle vector instruction specifies the indices of the
1821  * elements from the two input vectors to place in the result. The elements are
1822  * numbered in array-access order, starting with the first vector. These vectors
1823  * are always of type v16i8, thus each vector will contain 16 elements of size
1824  * 8. More info on the shuffle vector can be found in the
1825  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1826  * Language Reference.
1827  *
1828  * The RHSStartValue indicates whether the same input vectors are used (unary)
1829  * or two different input vectors are used, based on the following:
1830  *   - If the instruction uses the same vector for both inputs, the range of the
1831  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1832  *     be 0.
1833  *   - If the instruction has two different vectors then the range of the
1834  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1835  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1836  *     to 31 specify elements in the second vector).
1837  *
1838  * \param[in] N The shuffle vector SD Node to analyze
1839  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1840  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1841  * vector to the shuffle_vector instruction
1842  * \return true iff this shuffle vector represents an even or odd word merge
1843  */
1844 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1845                      unsigned RHSStartValue) {
1846   if (N->getValueType(0) != MVT::v16i8)
1847     return false;
1848 
1849   for (unsigned i = 0; i < 2; ++i)
1850     for (unsigned j = 0; j < 4; ++j)
1851       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1852                              i*RHSStartValue+j+IndexOffset) ||
1853           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1854                              i*RHSStartValue+j+IndexOffset+8))
1855         return false;
1856   return true;
1857 }
1858 
1859 /**
1860  * Determine if the specified shuffle mask is suitable for the vmrgew or
1861  * vmrgow instructions.
1862  *
1863  * \param[in] N The shuffle vector SD Node to analyze
1864  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1865  * \param[in] ShuffleKind Identify the type of merge:
1866  *   - 0 = big-endian merge with two different inputs;
1867  *   - 1 = either-endian merge with two identical inputs;
1868  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1869  *     little-endian merges).
1870  * \param[in] DAG The current SelectionDAG
1871  * \return true iff this shuffle mask
1872  */
1873 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1874                               unsigned ShuffleKind, SelectionDAG &DAG) {
1875   if (DAG.getDataLayout().isLittleEndian()) {
1876     unsigned indexOffset = CheckEven ? 4 : 0;
1877     if (ShuffleKind == 1) // Unary
1878       return isVMerge(N, indexOffset, 0);
1879     else if (ShuffleKind == 2) // swapped
1880       return isVMerge(N, indexOffset, 16);
1881     else
1882       return false;
1883   }
1884   else {
1885     unsigned indexOffset = CheckEven ? 0 : 4;
1886     if (ShuffleKind == 1) // Unary
1887       return isVMerge(N, indexOffset, 0);
1888     else if (ShuffleKind == 0) // Normal
1889       return isVMerge(N, indexOffset, 16);
1890     else
1891       return false;
1892   }
1893   return false;
1894 }
1895 
1896 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1897 /// amount, otherwise return -1.
1898 /// The ShuffleKind distinguishes between big-endian operations with two
1899 /// different inputs (0), either-endian operations with two identical inputs
1900 /// (1), and little-endian operations with two different inputs (2).  For the
1901 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1902 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1903                              SelectionDAG &DAG) {
1904   if (N->getValueType(0) != MVT::v16i8)
1905     return -1;
1906 
1907   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1908 
1909   // Find the first non-undef value in the shuffle mask.
1910   unsigned i;
1911   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1912     /*search*/;
1913 
1914   if (i == 16) return -1;  // all undef.
1915 
1916   // Otherwise, check to see if the rest of the elements are consecutively
1917   // numbered from this value.
1918   unsigned ShiftAmt = SVOp->getMaskElt(i);
1919   if (ShiftAmt < i) return -1;
1920 
1921   ShiftAmt -= i;
1922   bool isLE = DAG.getDataLayout().isLittleEndian();
1923 
1924   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1925     // Check the rest of the elements to see if they are consecutive.
1926     for (++i; i != 16; ++i)
1927       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1928         return -1;
1929   } else if (ShuffleKind == 1) {
1930     // Check the rest of the elements to see if they are consecutive.
1931     for (++i; i != 16; ++i)
1932       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1933         return -1;
1934   } else
1935     return -1;
1936 
1937   if (isLE)
1938     ShiftAmt = 16 - ShiftAmt;
1939 
1940   return ShiftAmt;
1941 }
1942 
1943 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1944 /// specifies a splat of a single element that is suitable for input to
1945 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1946 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1947   assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1948          EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
1949 
1950   // The consecutive indices need to specify an element, not part of two
1951   // different elements.  So abandon ship early if this isn't the case.
1952   if (N->getMaskElt(0) % EltSize != 0)
1953     return false;
1954 
1955   // This is a splat operation if each element of the permute is the same, and
1956   // if the value doesn't reference the second vector.
1957   unsigned ElementBase = N->getMaskElt(0);
1958 
1959   // FIXME: Handle UNDEF elements too!
1960   if (ElementBase >= 16)
1961     return false;
1962 
1963   // Check that the indices are consecutive, in the case of a multi-byte element
1964   // splatted with a v16i8 mask.
1965   for (unsigned i = 1; i != EltSize; ++i)
1966     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1967       return false;
1968 
1969   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1970     if (N->getMaskElt(i) < 0) continue;
1971     for (unsigned j = 0; j != EltSize; ++j)
1972       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1973         return false;
1974   }
1975   return true;
1976 }
1977 
1978 /// Check that the mask is shuffling N byte elements. Within each N byte
1979 /// element of the mask, the indices could be either in increasing or
1980 /// decreasing order as long as they are consecutive.
1981 /// \param[in] N the shuffle vector SD Node to analyze
1982 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1983 /// Word/DoubleWord/QuadWord).
1984 /// \param[in] StepLen the delta indices number among the N byte element, if
1985 /// the mask is in increasing/decreasing order then it is 1/-1.
1986 /// \return true iff the mask is shuffling N byte elements.
1987 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1988                                    int StepLen) {
1989   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1990          "Unexpected element width.");
1991   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1992 
1993   unsigned NumOfElem = 16 / Width;
1994   unsigned MaskVal[16]; //  Width is never greater than 16
1995   for (unsigned i = 0; i < NumOfElem; ++i) {
1996     MaskVal[0] = N->getMaskElt(i * Width);
1997     if ((StepLen == 1) && (MaskVal[0] % Width)) {
1998       return false;
1999     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2000       return false;
2001     }
2002 
2003     for (unsigned int j = 1; j < Width; ++j) {
2004       MaskVal[j] = N->getMaskElt(i * Width + j);
2005       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2006         return false;
2007       }
2008     }
2009   }
2010 
2011   return true;
2012 }
2013 
2014 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2015                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2016   if (!isNByteElemShuffleMask(N, 4, 1))
2017     return false;
2018 
2019   // Now we look at mask elements 0,4,8,12
2020   unsigned M0 = N->getMaskElt(0) / 4;
2021   unsigned M1 = N->getMaskElt(4) / 4;
2022   unsigned M2 = N->getMaskElt(8) / 4;
2023   unsigned M3 = N->getMaskElt(12) / 4;
2024   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2025   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2026 
2027   // Below, let H and L be arbitrary elements of the shuffle mask
2028   // where H is in the range [4,7] and L is in the range [0,3].
2029   // H, 1, 2, 3 or L, 5, 6, 7
2030   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2031       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2032     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2033     InsertAtByte = IsLE ? 12 : 0;
2034     Swap = M0 < 4;
2035     return true;
2036   }
2037   // 0, H, 2, 3 or 4, L, 6, 7
2038   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2039       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2040     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2041     InsertAtByte = IsLE ? 8 : 4;
2042     Swap = M1 < 4;
2043     return true;
2044   }
2045   // 0, 1, H, 3 or 4, 5, L, 7
2046   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2047       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2048     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2049     InsertAtByte = IsLE ? 4 : 8;
2050     Swap = M2 < 4;
2051     return true;
2052   }
2053   // 0, 1, 2, H or 4, 5, 6, L
2054   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2055       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2056     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2057     InsertAtByte = IsLE ? 0 : 12;
2058     Swap = M3 < 4;
2059     return true;
2060   }
2061 
2062   // If both vector operands for the shuffle are the same vector, the mask will
2063   // contain only elements from the first one and the second one will be undef.
2064   if (N->getOperand(1).isUndef()) {
2065     ShiftElts = 0;
2066     Swap = true;
2067     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2068     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2069       InsertAtByte = IsLE ? 12 : 0;
2070       return true;
2071     }
2072     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2073       InsertAtByte = IsLE ? 8 : 4;
2074       return true;
2075     }
2076     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2077       InsertAtByte = IsLE ? 4 : 8;
2078       return true;
2079     }
2080     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2081       InsertAtByte = IsLE ? 0 : 12;
2082       return true;
2083     }
2084   }
2085 
2086   return false;
2087 }
2088 
2089 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2090                                bool &Swap, bool IsLE) {
2091   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2092   // Ensure each byte index of the word is consecutive.
2093   if (!isNByteElemShuffleMask(N, 4, 1))
2094     return false;
2095 
2096   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2097   unsigned M0 = N->getMaskElt(0) / 4;
2098   unsigned M1 = N->getMaskElt(4) / 4;
2099   unsigned M2 = N->getMaskElt(8) / 4;
2100   unsigned M3 = N->getMaskElt(12) / 4;
2101 
2102   // If both vector operands for the shuffle are the same vector, the mask will
2103   // contain only elements from the first one and the second one will be undef.
2104   if (N->getOperand(1).isUndef()) {
2105     assert(M0 < 4 && "Indexing into an undef vector?");
2106     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2107       return false;
2108 
2109     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2110     Swap = false;
2111     return true;
2112   }
2113 
2114   // Ensure each word index of the ShuffleVector Mask is consecutive.
2115   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2116     return false;
2117 
2118   if (IsLE) {
2119     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2120       // Input vectors don't need to be swapped if the leading element
2121       // of the result is one of the 3 left elements of the second vector
2122       // (or if there is no shift to be done at all).
2123       Swap = false;
2124       ShiftElts = (8 - M0) % 8;
2125     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2126       // Input vectors need to be swapped if the leading element
2127       // of the result is one of the 3 left elements of the first vector
2128       // (or if we're shifting by 4 - thereby simply swapping the vectors).
2129       Swap = true;
2130       ShiftElts = (4 - M0) % 4;
2131     }
2132 
2133     return true;
2134   } else {                                          // BE
2135     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2136       // Input vectors don't need to be swapped if the leading element
2137       // of the result is one of the 4 elements of the first vector.
2138       Swap = false;
2139       ShiftElts = M0;
2140     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2141       // Input vectors need to be swapped if the leading element
2142       // of the result is one of the 4 elements of the right vector.
2143       Swap = true;
2144       ShiftElts = M0 - 4;
2145     }
2146 
2147     return true;
2148   }
2149 }
2150 
2151 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2152   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2153 
2154   if (!isNByteElemShuffleMask(N, Width, -1))
2155     return false;
2156 
2157   for (int i = 0; i < 16; i += Width)
2158     if (N->getMaskElt(i) != i + Width - 1)
2159       return false;
2160 
2161   return true;
2162 }
2163 
2164 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2165   return isXXBRShuffleMaskHelper(N, 2);
2166 }
2167 
2168 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2169   return isXXBRShuffleMaskHelper(N, 4);
2170 }
2171 
2172 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2173   return isXXBRShuffleMaskHelper(N, 8);
2174 }
2175 
2176 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2177   return isXXBRShuffleMaskHelper(N, 16);
2178 }
2179 
2180 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2181 /// if the inputs to the instruction should be swapped and set \p DM to the
2182 /// value for the immediate.
2183 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2184 /// AND element 0 of the result comes from the first input (LE) or second input
2185 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2186 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2187 /// mask.
2188 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2189                                bool &Swap, bool IsLE) {
2190   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2191 
2192   // Ensure each byte index of the double word is consecutive.
2193   if (!isNByteElemShuffleMask(N, 8, 1))
2194     return false;
2195 
2196   unsigned M0 = N->getMaskElt(0) / 8;
2197   unsigned M1 = N->getMaskElt(8) / 8;
2198   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2199 
2200   // If both vector operands for the shuffle are the same vector, the mask will
2201   // contain only elements from the first one and the second one will be undef.
2202   if (N->getOperand(1).isUndef()) {
2203     if ((M0 | M1) < 2) {
2204       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2205       Swap = false;
2206       return true;
2207     } else
2208       return false;
2209   }
2210 
2211   if (IsLE) {
2212     if (M0 > 1 && M1 < 2) {
2213       Swap = false;
2214     } else if (M0 < 2 && M1 > 1) {
2215       M0 = (M0 + 2) % 4;
2216       M1 = (M1 + 2) % 4;
2217       Swap = true;
2218     } else
2219       return false;
2220 
2221     // Note: if control flow comes here that means Swap is already set above
2222     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2223     return true;
2224   } else { // BE
2225     if (M0 < 2 && M1 > 1) {
2226       Swap = false;
2227     } else if (M0 > 1 && M1 < 2) {
2228       M0 = (M0 + 2) % 4;
2229       M1 = (M1 + 2) % 4;
2230       Swap = true;
2231     } else
2232       return false;
2233 
2234     // Note: if control flow comes here that means Swap is already set above
2235     DM = (M0 << 1) + (M1 & 1);
2236     return true;
2237   }
2238 }
2239 
2240 
2241 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2242 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2243 /// elements are counted from the left of the vector register).
2244 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2245                                          SelectionDAG &DAG) {
2246   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2247   assert(isSplatShuffleMask(SVOp, EltSize));
2248   if (DAG.getDataLayout().isLittleEndian())
2249     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2250   else
2251     return SVOp->getMaskElt(0) / EltSize;
2252 }
2253 
2254 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2255 /// by using a vspltis[bhw] instruction of the specified element size, return
2256 /// the constant being splatted.  The ByteSize field indicates the number of
2257 /// bytes of each element [124] -> [bhw].
2258 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2259   SDValue OpVal(nullptr, 0);
2260 
2261   // If ByteSize of the splat is bigger than the element size of the
2262   // build_vector, then we have a case where we are checking for a splat where
2263   // multiple elements of the buildvector are folded together into a single
2264   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2265   unsigned EltSize = 16/N->getNumOperands();
2266   if (EltSize < ByteSize) {
2267     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2268     SDValue UniquedVals[4];
2269     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2270 
2271     // See if all of the elements in the buildvector agree across.
2272     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2273       if (N->getOperand(i).isUndef()) continue;
2274       // If the element isn't a constant, bail fully out.
2275       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2276 
2277       if (!UniquedVals[i&(Multiple-1)].getNode())
2278         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2279       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2280         return SDValue();  // no match.
2281     }
2282 
2283     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2284     // either constant or undef values that are identical for each chunk.  See
2285     // if these chunks can form into a larger vspltis*.
2286 
2287     // Check to see if all of the leading entries are either 0 or -1.  If
2288     // neither, then this won't fit into the immediate field.
2289     bool LeadingZero = true;
2290     bool LeadingOnes = true;
2291     for (unsigned i = 0; i != Multiple-1; ++i) {
2292       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2293 
2294       LeadingZero &= isNullConstant(UniquedVals[i]);
2295       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2296     }
2297     // Finally, check the least significant entry.
2298     if (LeadingZero) {
2299       if (!UniquedVals[Multiple-1].getNode())
2300         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2301       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2302       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2303         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2304     }
2305     if (LeadingOnes) {
2306       if (!UniquedVals[Multiple-1].getNode())
2307         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2308       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2309       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2310         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2311     }
2312 
2313     return SDValue();
2314   }
2315 
2316   // Check to see if this buildvec has a single non-undef value in its elements.
2317   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2318     if (N->getOperand(i).isUndef()) continue;
2319     if (!OpVal.getNode())
2320       OpVal = N->getOperand(i);
2321     else if (OpVal != N->getOperand(i))
2322       return SDValue();
2323   }
2324 
2325   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2326 
2327   unsigned ValSizeInBytes = EltSize;
2328   uint64_t Value = 0;
2329   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2330     Value = CN->getZExtValue();
2331   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2332     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2333     Value = FloatToBits(CN->getValueAPF().convertToFloat());
2334   }
2335 
2336   // If the splat value is larger than the element value, then we can never do
2337   // this splat.  The only case that we could fit the replicated bits into our
2338   // immediate field for would be zero, and we prefer to use vxor for it.
2339   if (ValSizeInBytes < ByteSize) return SDValue();
2340 
2341   // If the element value is larger than the splat value, check if it consists
2342   // of a repeated bit pattern of size ByteSize.
2343   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2344     return SDValue();
2345 
2346   // Properly sign extend the value.
2347   int MaskVal = SignExtend32(Value, ByteSize * 8);
2348 
2349   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2350   if (MaskVal == 0) return SDValue();
2351 
2352   // Finally, if this value fits in a 5 bit sext field, return it
2353   if (SignExtend32<5>(MaskVal) == MaskVal)
2354     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2355   return SDValue();
2356 }
2357 
2358 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2359 /// amount, otherwise return -1.
2360 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2361   EVT VT = N->getValueType(0);
2362   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2363     return -1;
2364 
2365   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2366 
2367   // Find the first non-undef value in the shuffle mask.
2368   unsigned i;
2369   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2370     /*search*/;
2371 
2372   if (i == 4) return -1;  // all undef.
2373 
2374   // Otherwise, check to see if the rest of the elements are consecutively
2375   // numbered from this value.
2376   unsigned ShiftAmt = SVOp->getMaskElt(i);
2377   if (ShiftAmt < i) return -1;
2378   ShiftAmt -= i;
2379 
2380   // Check the rest of the elements to see if they are consecutive.
2381   for (++i; i != 4; ++i)
2382     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2383       return -1;
2384 
2385   return ShiftAmt;
2386 }
2387 
2388 //===----------------------------------------------------------------------===//
2389 //  Addressing Mode Selection
2390 //===----------------------------------------------------------------------===//
2391 
2392 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2393 /// or 64-bit immediate, and if the value can be accurately represented as a
2394 /// sign extension from a 16-bit value.  If so, this returns true and the
2395 /// immediate.
2396 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2397   if (!isa<ConstantSDNode>(N))
2398     return false;
2399 
2400   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2401   if (N->getValueType(0) == MVT::i32)
2402     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2403   else
2404     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2405 }
2406 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2407   return isIntS16Immediate(Op.getNode(), Imm);
2408 }
2409 
2410 
2411 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2412 /// be represented as an indexed [r+r] operation.
2413 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2414                                                SDValue &Index,
2415                                                SelectionDAG &DAG) const {
2416   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2417       UI != E; ++UI) {
2418     if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2419       if (Memop->getMemoryVT() == MVT::f64) {
2420           Base = N.getOperand(0);
2421           Index = N.getOperand(1);
2422           return true;
2423       }
2424     }
2425   }
2426   return false;
2427 }
2428 
2429 /// isIntS34Immediate - This method tests if value of node given can be
2430 /// accurately represented as a sign extension from a 34-bit value.  If so,
2431 /// this returns true and the immediate.
2432 bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2433   if (!isa<ConstantSDNode>(N))
2434     return false;
2435 
2436   Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2437   return isInt<34>(Imm);
2438 }
2439 bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2440   return isIntS34Immediate(Op.getNode(), Imm);
2441 }
2442 
2443 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2444 /// can be represented as an indexed [r+r] operation.  Returns false if it
2445 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2446 /// non-zero and N can be represented by a base register plus a signed 16-bit
2447 /// displacement, make a more precise judgement by checking (displacement % \p
2448 /// EncodingAlignment).
2449 bool PPCTargetLowering::SelectAddressRegReg(
2450     SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2451     MaybeAlign EncodingAlignment) const {
2452   // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2453   // a [pc+imm].
2454   if (SelectAddressPCRel(N, Base))
2455     return false;
2456 
2457   int16_t Imm = 0;
2458   if (N.getOpcode() == ISD::ADD) {
2459     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2460     // SPE load/store can only handle 8-bit offsets.
2461     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2462         return true;
2463     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2464         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2465       return false; // r+i
2466     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2467       return false;    // r+i
2468 
2469     Base = N.getOperand(0);
2470     Index = N.getOperand(1);
2471     return true;
2472   } else if (N.getOpcode() == ISD::OR) {
2473     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2474         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2475       return false; // r+i can fold it if we can.
2476 
2477     // If this is an or of disjoint bitfields, we can codegen this as an add
2478     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2479     // disjoint.
2480     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2481 
2482     if (LHSKnown.Zero.getBoolValue()) {
2483       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2484       // If all of the bits are known zero on the LHS or RHS, the add won't
2485       // carry.
2486       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2487         Base = N.getOperand(0);
2488         Index = N.getOperand(1);
2489         return true;
2490       }
2491     }
2492   }
2493 
2494   return false;
2495 }
2496 
2497 // If we happen to be doing an i64 load or store into a stack slot that has
2498 // less than a 4-byte alignment, then the frame-index elimination may need to
2499 // use an indexed load or store instruction (because the offset may not be a
2500 // multiple of 4). The extra register needed to hold the offset comes from the
2501 // register scavenger, and it is possible that the scavenger will need to use
2502 // an emergency spill slot. As a result, we need to make sure that a spill slot
2503 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2504 // stack slot.
2505 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2506   // FIXME: This does not handle the LWA case.
2507   if (VT != MVT::i64)
2508     return;
2509 
2510   // NOTE: We'll exclude negative FIs here, which come from argument
2511   // lowering, because there are no known test cases triggering this problem
2512   // using packed structures (or similar). We can remove this exclusion if
2513   // we find such a test case. The reason why this is so test-case driven is
2514   // because this entire 'fixup' is only to prevent crashes (from the
2515   // register scavenger) on not-really-valid inputs. For example, if we have:
2516   //   %a = alloca i1
2517   //   %b = bitcast i1* %a to i64*
2518   //   store i64* a, i64 b
2519   // then the store should really be marked as 'align 1', but is not. If it
2520   // were marked as 'align 1' then the indexed form would have been
2521   // instruction-selected initially, and the problem this 'fixup' is preventing
2522   // won't happen regardless.
2523   if (FrameIdx < 0)
2524     return;
2525 
2526   MachineFunction &MF = DAG.getMachineFunction();
2527   MachineFrameInfo &MFI = MF.getFrameInfo();
2528 
2529   if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2530     return;
2531 
2532   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2533   FuncInfo->setHasNonRISpills();
2534 }
2535 
2536 /// Returns true if the address N can be represented by a base register plus
2537 /// a signed 16-bit displacement [r+imm], and if it is not better
2538 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2539 /// displacements that are multiples of that value.
2540 bool PPCTargetLowering::SelectAddressRegImm(
2541     SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2542     MaybeAlign EncodingAlignment) const {
2543   // FIXME dl should come from parent load or store, not from address
2544   SDLoc dl(N);
2545 
2546   // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2547   // a [pc+imm].
2548   if (SelectAddressPCRel(N, Base))
2549     return false;
2550 
2551   // If this can be more profitably realized as r+r, fail.
2552   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2553     return false;
2554 
2555   if (N.getOpcode() == ISD::ADD) {
2556     int16_t imm = 0;
2557     if (isIntS16Immediate(N.getOperand(1), imm) &&
2558         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2559       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2560       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2561         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2562         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2563       } else {
2564         Base = N.getOperand(0);
2565       }
2566       return true; // [r+i]
2567     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2568       // Match LOAD (ADD (X, Lo(G))).
2569       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2570              && "Cannot handle constant offsets yet!");
2571       Disp = N.getOperand(1).getOperand(0);  // The global address.
2572       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2573              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2574              Disp.getOpcode() == ISD::TargetConstantPool ||
2575              Disp.getOpcode() == ISD::TargetJumpTable);
2576       Base = N.getOperand(0);
2577       return true;  // [&g+r]
2578     }
2579   } else if (N.getOpcode() == ISD::OR) {
2580     int16_t imm = 0;
2581     if (isIntS16Immediate(N.getOperand(1), imm) &&
2582         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2583       // If this is an or of disjoint bitfields, we can codegen this as an add
2584       // (for better address arithmetic) if the LHS and RHS of the OR are
2585       // provably disjoint.
2586       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2587 
2588       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2589         // If all of the bits are known zero on the LHS or RHS, the add won't
2590         // carry.
2591         if (FrameIndexSDNode *FI =
2592               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2593           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2594           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2595         } else {
2596           Base = N.getOperand(0);
2597         }
2598         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2599         return true;
2600       }
2601     }
2602   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2603     // Loading from a constant address.
2604 
2605     // If this address fits entirely in a 16-bit sext immediate field, codegen
2606     // this as "d, 0"
2607     int16_t Imm;
2608     if (isIntS16Immediate(CN, Imm) &&
2609         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2610       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2611       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2612                              CN->getValueType(0));
2613       return true;
2614     }
2615 
2616     // Handle 32-bit sext immediates with LIS + addr mode.
2617     if ((CN->getValueType(0) == MVT::i32 ||
2618          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2619         (!EncodingAlignment ||
2620          isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2621       int Addr = (int)CN->getZExtValue();
2622 
2623       // Otherwise, break this down into an LIS + disp.
2624       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2625 
2626       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2627                                    MVT::i32);
2628       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2629       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2630       return true;
2631     }
2632   }
2633 
2634   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2635   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2636     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2637     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2638   } else
2639     Base = N;
2640   return true;      // [r+0]
2641 }
2642 
2643 /// Similar to the 16-bit case but for instructions that take a 34-bit
2644 /// displacement field (prefixed loads/stores).
2645 bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2646                                               SDValue &Base,
2647                                               SelectionDAG &DAG) const {
2648   // Only on 64-bit targets.
2649   if (N.getValueType() != MVT::i64)
2650     return false;
2651 
2652   SDLoc dl(N);
2653   int64_t Imm = 0;
2654 
2655   if (N.getOpcode() == ISD::ADD) {
2656     if (!isIntS34Immediate(N.getOperand(1), Imm))
2657       return false;
2658     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2659     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2660       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2661     else
2662       Base = N.getOperand(0);
2663     return true;
2664   }
2665 
2666   if (N.getOpcode() == ISD::OR) {
2667     if (!isIntS34Immediate(N.getOperand(1), Imm))
2668       return false;
2669     // If this is an or of disjoint bitfields, we can codegen this as an add
2670     // (for better address arithmetic) if the LHS and RHS of the OR are
2671     // provably disjoint.
2672     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2673     if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2674       return false;
2675     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2676       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2677     else
2678       Base = N.getOperand(0);
2679     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2680     return true;
2681   }
2682 
2683   if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2684     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2685     Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2686     return true;
2687   }
2688 
2689   return false;
2690 }
2691 
2692 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2693 /// represented as an indexed [r+r] operation.
2694 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2695                                                 SDValue &Index,
2696                                                 SelectionDAG &DAG) const {
2697   // Check to see if we can easily represent this as an [r+r] address.  This
2698   // will fail if it thinks that the address is more profitably represented as
2699   // reg+imm, e.g. where imm = 0.
2700   if (SelectAddressRegReg(N, Base, Index, DAG))
2701     return true;
2702 
2703   // If the address is the result of an add, we will utilize the fact that the
2704   // address calculation includes an implicit add.  However, we can reduce
2705   // register pressure if we do not materialize a constant just for use as the
2706   // index register.  We only get rid of the add if it is not an add of a
2707   // value and a 16-bit signed constant and both have a single use.
2708   int16_t imm = 0;
2709   if (N.getOpcode() == ISD::ADD &&
2710       (!isIntS16Immediate(N.getOperand(1), imm) ||
2711        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2712     Base = N.getOperand(0);
2713     Index = N.getOperand(1);
2714     return true;
2715   }
2716 
2717   // Otherwise, do it the hard way, using R0 as the base register.
2718   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2719                          N.getValueType());
2720   Index = N;
2721   return true;
2722 }
2723 
2724 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2725   Ty *PCRelCand = dyn_cast<Ty>(N);
2726   return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2727 }
2728 
2729 /// Returns true if this address is a PC Relative address.
2730 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2731 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2732 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2733   // This is a materialize PC Relative node. Always select this as PC Relative.
2734   Base = N;
2735   if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2736     return true;
2737   if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2738       isValidPCRelNode<GlobalAddressSDNode>(N) ||
2739       isValidPCRelNode<JumpTableSDNode>(N) ||
2740       isValidPCRelNode<BlockAddressSDNode>(N))
2741     return true;
2742   return false;
2743 }
2744 
2745 /// Returns true if we should use a direct load into vector instruction
2746 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2747 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2748 
2749   // If there are any other uses other than scalar to vector, then we should
2750   // keep it as a scalar load -> direct move pattern to prevent multiple
2751   // loads.
2752   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2753   if (!LD)
2754     return false;
2755 
2756   EVT MemVT = LD->getMemoryVT();
2757   if (!MemVT.isSimple())
2758     return false;
2759   switch(MemVT.getSimpleVT().SimpleTy) {
2760   case MVT::i64:
2761     break;
2762   case MVT::i32:
2763     if (!ST.hasP8Vector())
2764       return false;
2765     break;
2766   case MVT::i16:
2767   case MVT::i8:
2768     if (!ST.hasP9Vector())
2769       return false;
2770     break;
2771   default:
2772     return false;
2773   }
2774 
2775   SDValue LoadedVal(N, 0);
2776   if (!LoadedVal.hasOneUse())
2777     return false;
2778 
2779   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2780        UI != UE; ++UI)
2781     if (UI.getUse().get().getResNo() == 0 &&
2782         UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2783         UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2784       return false;
2785 
2786   return true;
2787 }
2788 
2789 /// getPreIndexedAddressParts - returns true by value, base pointer and
2790 /// offset pointer and addressing mode by reference if the node's address
2791 /// can be legally represented as pre-indexed load / store address.
2792 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2793                                                   SDValue &Offset,
2794                                                   ISD::MemIndexedMode &AM,
2795                                                   SelectionDAG &DAG) const {
2796   if (DisablePPCPreinc) return false;
2797 
2798   bool isLoad = true;
2799   SDValue Ptr;
2800   EVT VT;
2801   unsigned Alignment;
2802   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2803     Ptr = LD->getBasePtr();
2804     VT = LD->getMemoryVT();
2805     Alignment = LD->getAlignment();
2806   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2807     Ptr = ST->getBasePtr();
2808     VT  = ST->getMemoryVT();
2809     Alignment = ST->getAlignment();
2810     isLoad = false;
2811   } else
2812     return false;
2813 
2814   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2815   // instructions because we can fold these into a more efficient instruction
2816   // instead, (such as LXSD).
2817   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2818     return false;
2819   }
2820 
2821   // PowerPC doesn't have preinc load/store instructions for vectors
2822   if (VT.isVector())
2823     return false;
2824 
2825   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2826     // Common code will reject creating a pre-inc form if the base pointer
2827     // is a frame index, or if N is a store and the base pointer is either
2828     // the same as or a predecessor of the value being stored.  Check for
2829     // those situations here, and try with swapped Base/Offset instead.
2830     bool Swap = false;
2831 
2832     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2833       Swap = true;
2834     else if (!isLoad) {
2835       SDValue Val = cast<StoreSDNode>(N)->getValue();
2836       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2837         Swap = true;
2838     }
2839 
2840     if (Swap)
2841       std::swap(Base, Offset);
2842 
2843     AM = ISD::PRE_INC;
2844     return true;
2845   }
2846 
2847   // LDU/STU can only handle immediates that are a multiple of 4.
2848   if (VT != MVT::i64) {
2849     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2850       return false;
2851   } else {
2852     // LDU/STU need an address with at least 4-byte alignment.
2853     if (Alignment < 4)
2854       return false;
2855 
2856     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2857       return false;
2858   }
2859 
2860   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2861     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2862     // sext i32 to i64 when addr mode is r+i.
2863     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2864         LD->getExtensionType() == ISD::SEXTLOAD &&
2865         isa<ConstantSDNode>(Offset))
2866       return false;
2867   }
2868 
2869   AM = ISD::PRE_INC;
2870   return true;
2871 }
2872 
2873 //===----------------------------------------------------------------------===//
2874 //  LowerOperation implementation
2875 //===----------------------------------------------------------------------===//
2876 
2877 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2878 /// and LoOpFlags to the target MO flags.
2879 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2880                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2881                                const GlobalValue *GV = nullptr) {
2882   HiOpFlags = PPCII::MO_HA;
2883   LoOpFlags = PPCII::MO_LO;
2884 
2885   // Don't use the pic base if not in PIC relocation model.
2886   if (IsPIC) {
2887     HiOpFlags |= PPCII::MO_PIC_FLAG;
2888     LoOpFlags |= PPCII::MO_PIC_FLAG;
2889   }
2890 }
2891 
2892 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2893                              SelectionDAG &DAG) {
2894   SDLoc DL(HiPart);
2895   EVT PtrVT = HiPart.getValueType();
2896   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2897 
2898   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2899   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2900 
2901   // With PIC, the first instruction is actually "GR+hi(&G)".
2902   if (isPIC)
2903     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2904                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2905 
2906   // Generate non-pic code that has direct accesses to the constant pool.
2907   // The address of the global is just (hi(&g)+lo(&g)).
2908   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2909 }
2910 
2911 static void setUsesTOCBasePtr(MachineFunction &MF) {
2912   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2913   FuncInfo->setUsesTOCBasePtr();
2914 }
2915 
2916 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2917   setUsesTOCBasePtr(DAG.getMachineFunction());
2918 }
2919 
2920 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2921                                        SDValue GA) const {
2922   const bool Is64Bit = Subtarget.isPPC64();
2923   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2924   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2925                         : Subtarget.isAIXABI()
2926                               ? DAG.getRegister(PPC::R2, VT)
2927                               : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2928   SDValue Ops[] = { GA, Reg };
2929   return DAG.getMemIntrinsicNode(
2930       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2931       MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2932       MachineMemOperand::MOLoad);
2933 }
2934 
2935 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2936                                              SelectionDAG &DAG) const {
2937   EVT PtrVT = Op.getValueType();
2938   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2939   const Constant *C = CP->getConstVal();
2940 
2941   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2942   // The actual address of the GlobalValue is stored in the TOC.
2943   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2944     if (Subtarget.isUsingPCRelativeCalls()) {
2945       SDLoc DL(CP);
2946       EVT Ty = getPointerTy(DAG.getDataLayout());
2947       SDValue ConstPool = DAG.getTargetConstantPool(
2948           C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2949       return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2950     }
2951     setUsesTOCBasePtr(DAG);
2952     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2953     return getTOCEntry(DAG, SDLoc(CP), GA);
2954   }
2955 
2956   unsigned MOHiFlag, MOLoFlag;
2957   bool IsPIC = isPositionIndependent();
2958   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2959 
2960   if (IsPIC && Subtarget.isSVR4ABI()) {
2961     SDValue GA =
2962         DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2963     return getTOCEntry(DAG, SDLoc(CP), GA);
2964   }
2965 
2966   SDValue CPIHi =
2967       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2968   SDValue CPILo =
2969       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2970   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2971 }
2972 
2973 // For 64-bit PowerPC, prefer the more compact relative encodings.
2974 // This trades 32 bits per jump table entry for one or two instructions
2975 // on the jump site.
2976 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2977   if (isJumpTableRelative())
2978     return MachineJumpTableInfo::EK_LabelDifference32;
2979 
2980   return TargetLowering::getJumpTableEncoding();
2981 }
2982 
2983 bool PPCTargetLowering::isJumpTableRelative() const {
2984   if (UseAbsoluteJumpTables)
2985     return false;
2986   if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2987     return true;
2988   return TargetLowering::isJumpTableRelative();
2989 }
2990 
2991 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2992                                                     SelectionDAG &DAG) const {
2993   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2994     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2995 
2996   switch (getTargetMachine().getCodeModel()) {
2997   case CodeModel::Small:
2998   case CodeModel::Medium:
2999     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3000   default:
3001     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3002                        getPointerTy(DAG.getDataLayout()));
3003   }
3004 }
3005 
3006 const MCExpr *
3007 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3008                                                 unsigned JTI,
3009                                                 MCContext &Ctx) const {
3010   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3011     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3012 
3013   switch (getTargetMachine().getCodeModel()) {
3014   case CodeModel::Small:
3015   case CodeModel::Medium:
3016     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3017   default:
3018     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3019   }
3020 }
3021 
3022 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3023   EVT PtrVT = Op.getValueType();
3024   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3025 
3026   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3027   if (Subtarget.isUsingPCRelativeCalls()) {
3028     SDLoc DL(JT);
3029     EVT Ty = getPointerTy(DAG.getDataLayout());
3030     SDValue GA =
3031         DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3032     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3033     return MatAddr;
3034   }
3035 
3036   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3037   // The actual address of the GlobalValue is stored in the TOC.
3038   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3039     setUsesTOCBasePtr(DAG);
3040     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3041     return getTOCEntry(DAG, SDLoc(JT), GA);
3042   }
3043 
3044   unsigned MOHiFlag, MOLoFlag;
3045   bool IsPIC = isPositionIndependent();
3046   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3047 
3048   if (IsPIC && Subtarget.isSVR4ABI()) {
3049     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3050                                         PPCII::MO_PIC_FLAG);
3051     return getTOCEntry(DAG, SDLoc(GA), GA);
3052   }
3053 
3054   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3055   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3056   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3057 }
3058 
3059 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3060                                              SelectionDAG &DAG) const {
3061   EVT PtrVT = Op.getValueType();
3062   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3063   const BlockAddress *BA = BASDN->getBlockAddress();
3064 
3065   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3066   if (Subtarget.isUsingPCRelativeCalls()) {
3067     SDLoc DL(BASDN);
3068     EVT Ty = getPointerTy(DAG.getDataLayout());
3069     SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3070                                            PPCII::MO_PCREL_FLAG);
3071     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3072     return MatAddr;
3073   }
3074 
3075   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3076   // The actual BlockAddress is stored in the TOC.
3077   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3078     setUsesTOCBasePtr(DAG);
3079     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3080     return getTOCEntry(DAG, SDLoc(BASDN), GA);
3081   }
3082 
3083   // 32-bit position-independent ELF stores the BlockAddress in the .got.
3084   if (Subtarget.is32BitELFABI() && isPositionIndependent())
3085     return getTOCEntry(
3086         DAG, SDLoc(BASDN),
3087         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3088 
3089   unsigned MOHiFlag, MOLoFlag;
3090   bool IsPIC = isPositionIndependent();
3091   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3092   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3093   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3094   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3095 }
3096 
3097 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3098                                               SelectionDAG &DAG) const {
3099   // FIXME: TLS addresses currently use medium model code sequences,
3100   // which is the most useful form.  Eventually support for small and
3101   // large models could be added if users need it, at the cost of
3102   // additional complexity.
3103   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3104   if (DAG.getTarget().useEmulatedTLS())
3105     return LowerToTLSEmulatedModel(GA, DAG);
3106 
3107   SDLoc dl(GA);
3108   const GlobalValue *GV = GA->getGlobal();
3109   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3110   bool is64bit = Subtarget.isPPC64();
3111   const Module *M = DAG.getMachineFunction().getFunction().getParent();
3112   PICLevel::Level picLevel = M->getPICLevel();
3113 
3114   const TargetMachine &TM = getTargetMachine();
3115   TLSModel::Model Model = TM.getTLSModel(GV);
3116 
3117   if (Model == TLSModel::LocalExec) {
3118     if (Subtarget.isUsingPCRelativeCalls()) {
3119       SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3120       SDValue TGA = DAG.getTargetGlobalAddress(
3121           GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3122       SDValue MatAddr =
3123           DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3124       return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3125     }
3126 
3127     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3128                                                PPCII::MO_TPREL_HA);
3129     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3130                                                PPCII::MO_TPREL_LO);
3131     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3132                              : DAG.getRegister(PPC::R2, MVT::i32);
3133 
3134     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3135     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3136   }
3137 
3138   if (Model == TLSModel::InitialExec) {
3139     bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3140     SDValue TGA = DAG.getTargetGlobalAddress(
3141         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3142     SDValue TGATLS = DAG.getTargetGlobalAddress(
3143         GV, dl, PtrVT, 0,
3144         IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3145     SDValue TPOffset;
3146     if (IsPCRel) {
3147       SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3148       TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3149                              MachinePointerInfo());
3150     } else {
3151       SDValue GOTPtr;
3152       if (is64bit) {
3153         setUsesTOCBasePtr(DAG);
3154         SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3155         GOTPtr =
3156             DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3157       } else {
3158         if (!TM.isPositionIndependent())
3159           GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3160         else if (picLevel == PICLevel::SmallPIC)
3161           GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3162         else
3163           GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3164       }
3165       TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3166     }
3167     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3168   }
3169 
3170   if (Model == TLSModel::GeneralDynamic) {
3171     if (Subtarget.isUsingPCRelativeCalls()) {
3172       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3173                                                PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3174       return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3175     }
3176 
3177     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3178     SDValue GOTPtr;
3179     if (is64bit) {
3180       setUsesTOCBasePtr(DAG);
3181       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3182       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3183                                    GOTReg, TGA);
3184     } else {
3185       if (picLevel == PICLevel::SmallPIC)
3186         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3187       else
3188         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3189     }
3190     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3191                        GOTPtr, TGA, TGA);
3192   }
3193 
3194   if (Model == TLSModel::LocalDynamic) {
3195     if (Subtarget.isUsingPCRelativeCalls()) {
3196       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3197                                                PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3198       SDValue MatPCRel =
3199           DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3200       return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3201     }
3202 
3203     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3204     SDValue GOTPtr;
3205     if (is64bit) {
3206       setUsesTOCBasePtr(DAG);
3207       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3208       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3209                            GOTReg, TGA);
3210     } else {
3211       if (picLevel == PICLevel::SmallPIC)
3212         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3213       else
3214         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3215     }
3216     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3217                                   PtrVT, GOTPtr, TGA, TGA);
3218     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3219                                       PtrVT, TLSAddr, TGA);
3220     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3221   }
3222 
3223   llvm_unreachable("Unknown TLS model!");
3224 }
3225 
3226 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3227                                               SelectionDAG &DAG) const {
3228   EVT PtrVT = Op.getValueType();
3229   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3230   SDLoc DL(GSDN);
3231   const GlobalValue *GV = GSDN->getGlobal();
3232 
3233   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3234   // The actual address of the GlobalValue is stored in the TOC.
3235   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3236     if (Subtarget.isUsingPCRelativeCalls()) {
3237       EVT Ty = getPointerTy(DAG.getDataLayout());
3238       if (isAccessedAsGotIndirect(Op)) {
3239         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3240                                                 PPCII::MO_PCREL_FLAG |
3241                                                     PPCII::MO_GOT_FLAG);
3242         SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3243         SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3244                                    MachinePointerInfo());
3245         return Load;
3246       } else {
3247         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3248                                                 PPCII::MO_PCREL_FLAG);
3249         return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3250       }
3251     }
3252     setUsesTOCBasePtr(DAG);
3253     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3254     return getTOCEntry(DAG, DL, GA);
3255   }
3256 
3257   unsigned MOHiFlag, MOLoFlag;
3258   bool IsPIC = isPositionIndependent();
3259   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3260 
3261   if (IsPIC && Subtarget.isSVR4ABI()) {
3262     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3263                                             GSDN->getOffset(),
3264                                             PPCII::MO_PIC_FLAG);
3265     return getTOCEntry(DAG, DL, GA);
3266   }
3267 
3268   SDValue GAHi =
3269     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3270   SDValue GALo =
3271     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3272 
3273   return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3274 }
3275 
3276 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3277   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3278   SDLoc dl(Op);
3279 
3280   if (Op.getValueType() == MVT::v2i64) {
3281     // When the operands themselves are v2i64 values, we need to do something
3282     // special because VSX has no underlying comparison operations for these.
3283     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3284       // Equality can be handled by casting to the legal type for Altivec
3285       // comparisons, everything else needs to be expanded.
3286       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3287         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3288                  DAG.getSetCC(dl, MVT::v4i32,
3289                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3290                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3291                    CC));
3292       }
3293 
3294       return SDValue();
3295     }
3296 
3297     // We handle most of these in the usual way.
3298     return Op;
3299   }
3300 
3301   // If we're comparing for equality to zero, expose the fact that this is
3302   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3303   // fold the new nodes.
3304   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3305     return V;
3306 
3307   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3308     // Leave comparisons against 0 and -1 alone for now, since they're usually
3309     // optimized.  FIXME: revisit this when we can custom lower all setcc
3310     // optimizations.
3311     if (C->isAllOnesValue() || C->isNullValue())
3312       return SDValue();
3313   }
3314 
3315   // If we have an integer seteq/setne, turn it into a compare against zero
3316   // by xor'ing the rhs with the lhs, which is faster than setting a
3317   // condition register, reading it back out, and masking the correct bit.  The
3318   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3319   // the result to other bit-twiddling opportunities.
3320   EVT LHSVT = Op.getOperand(0).getValueType();
3321   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3322     EVT VT = Op.getValueType();
3323     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3324                                 Op.getOperand(1));
3325     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3326   }
3327   return SDValue();
3328 }
3329 
3330 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3331   SDNode *Node = Op.getNode();
3332   EVT VT = Node->getValueType(0);
3333   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3334   SDValue InChain = Node->getOperand(0);
3335   SDValue VAListPtr = Node->getOperand(1);
3336   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3337   SDLoc dl(Node);
3338 
3339   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3340 
3341   // gpr_index
3342   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3343                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3344   InChain = GprIndex.getValue(1);
3345 
3346   if (VT == MVT::i64) {
3347     // Check if GprIndex is even
3348     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3349                                  DAG.getConstant(1, dl, MVT::i32));
3350     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3351                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3352     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3353                                           DAG.getConstant(1, dl, MVT::i32));
3354     // Align GprIndex to be even if it isn't
3355     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3356                            GprIndex);
3357   }
3358 
3359   // fpr index is 1 byte after gpr
3360   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3361                                DAG.getConstant(1, dl, MVT::i32));
3362 
3363   // fpr
3364   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3365                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3366   InChain = FprIndex.getValue(1);
3367 
3368   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3369                                        DAG.getConstant(8, dl, MVT::i32));
3370 
3371   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3372                                         DAG.getConstant(4, dl, MVT::i32));
3373 
3374   // areas
3375   SDValue OverflowArea =
3376       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3377   InChain = OverflowArea.getValue(1);
3378 
3379   SDValue RegSaveArea =
3380       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3381   InChain = RegSaveArea.getValue(1);
3382 
3383   // select overflow_area if index > 8
3384   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3385                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3386 
3387   // adjustment constant gpr_index * 4/8
3388   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3389                                     VT.isInteger() ? GprIndex : FprIndex,
3390                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3391                                                     MVT::i32));
3392 
3393   // OurReg = RegSaveArea + RegConstant
3394   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3395                                RegConstant);
3396 
3397   // Floating types are 32 bytes into RegSaveArea
3398   if (VT.isFloatingPoint())
3399     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3400                          DAG.getConstant(32, dl, MVT::i32));
3401 
3402   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3403   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3404                                    VT.isInteger() ? GprIndex : FprIndex,
3405                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3406                                                    MVT::i32));
3407 
3408   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3409                               VT.isInteger() ? VAListPtr : FprPtr,
3410                               MachinePointerInfo(SV), MVT::i8);
3411 
3412   // determine if we should load from reg_save_area or overflow_area
3413   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3414 
3415   // increase overflow_area by 4/8 if gpr/fpr > 8
3416   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3417                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3418                                           dl, MVT::i32));
3419 
3420   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3421                              OverflowAreaPlusN);
3422 
3423   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3424                               MachinePointerInfo(), MVT::i32);
3425 
3426   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3427 }
3428 
3429 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3430   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3431 
3432   // We have to copy the entire va_list struct:
3433   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3434   return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3435                        DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3436                        false, true, false, MachinePointerInfo(),
3437                        MachinePointerInfo());
3438 }
3439 
3440 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3441                                                   SelectionDAG &DAG) const {
3442   if (Subtarget.isAIXABI())
3443     report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3444 
3445   return Op.getOperand(0);
3446 }
3447 
3448 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3449                                                 SelectionDAG &DAG) const {
3450   if (Subtarget.isAIXABI())
3451     report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3452 
3453   SDValue Chain = Op.getOperand(0);
3454   SDValue Trmp = Op.getOperand(1); // trampoline
3455   SDValue FPtr = Op.getOperand(2); // nested function
3456   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3457   SDLoc dl(Op);
3458 
3459   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3460   bool isPPC64 = (PtrVT == MVT::i64);
3461   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3462 
3463   TargetLowering::ArgListTy Args;
3464   TargetLowering::ArgListEntry Entry;
3465 
3466   Entry.Ty = IntPtrTy;
3467   Entry.Node = Trmp; Args.push_back(Entry);
3468 
3469   // TrampSize == (isPPC64 ? 48 : 40);
3470   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3471                                isPPC64 ? MVT::i64 : MVT::i32);
3472   Args.push_back(Entry);
3473 
3474   Entry.Node = FPtr; Args.push_back(Entry);
3475   Entry.Node = Nest; Args.push_back(Entry);
3476 
3477   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3478   TargetLowering::CallLoweringInfo CLI(DAG);
3479   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3480       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3481       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3482 
3483   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3484   return CallResult.second;
3485 }
3486 
3487 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3488   MachineFunction &MF = DAG.getMachineFunction();
3489   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3490   EVT PtrVT = getPointerTy(MF.getDataLayout());
3491 
3492   SDLoc dl(Op);
3493 
3494   if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3495     // vastart just stores the address of the VarArgsFrameIndex slot into the
3496     // memory location argument.
3497     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3498     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3499     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3500                         MachinePointerInfo(SV));
3501   }
3502 
3503   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3504   // We suppose the given va_list is already allocated.
3505   //
3506   // typedef struct {
3507   //  char gpr;     /* index into the array of 8 GPRs
3508   //                 * stored in the register save area
3509   //                 * gpr=0 corresponds to r3,
3510   //                 * gpr=1 to r4, etc.
3511   //                 */
3512   //  char fpr;     /* index into the array of 8 FPRs
3513   //                 * stored in the register save area
3514   //                 * fpr=0 corresponds to f1,
3515   //                 * fpr=1 to f2, etc.
3516   //                 */
3517   //  char *overflow_arg_area;
3518   //                /* location on stack that holds
3519   //                 * the next overflow argument
3520   //                 */
3521   //  char *reg_save_area;
3522   //               /* where r3:r10 and f1:f8 (if saved)
3523   //                * are stored
3524   //                */
3525   // } va_list[1];
3526 
3527   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3528   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3529   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3530                                             PtrVT);
3531   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3532                                  PtrVT);
3533 
3534   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3535   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3536 
3537   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3538   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3539 
3540   uint64_t FPROffset = 1;
3541   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3542 
3543   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3544 
3545   // Store first byte : number of int regs
3546   SDValue firstStore =
3547       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3548                         MachinePointerInfo(SV), MVT::i8);
3549   uint64_t nextOffset = FPROffset;
3550   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3551                                   ConstFPROffset);
3552 
3553   // Store second byte : number of float regs
3554   SDValue secondStore =
3555       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3556                         MachinePointerInfo(SV, nextOffset), MVT::i8);
3557   nextOffset += StackOffset;
3558   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3559 
3560   // Store second word : arguments given on stack
3561   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3562                                     MachinePointerInfo(SV, nextOffset));
3563   nextOffset += FrameOffset;
3564   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3565 
3566   // Store third word : arguments given in registers
3567   return DAG.getStore(thirdStore, dl, FR, nextPtr,
3568                       MachinePointerInfo(SV, nextOffset));
3569 }
3570 
3571 /// FPR - The set of FP registers that should be allocated for arguments
3572 /// on Darwin and AIX.
3573 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3574                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3575                                 PPC::F11, PPC::F12, PPC::F13};
3576 
3577 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3578 /// the stack.
3579 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3580                                        unsigned PtrByteSize) {
3581   unsigned ArgSize = ArgVT.getStoreSize();
3582   if (Flags.isByVal())
3583     ArgSize = Flags.getByValSize();
3584 
3585   // Round up to multiples of the pointer size, except for array members,
3586   // which are always packed.
3587   if (!Flags.isInConsecutiveRegs())
3588     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3589 
3590   return ArgSize;
3591 }
3592 
3593 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3594 /// on the stack.
3595 static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3596                                          ISD::ArgFlagsTy Flags,
3597                                          unsigned PtrByteSize) {
3598   Align Alignment(PtrByteSize);
3599 
3600   // Altivec parameters are padded to a 16 byte boundary.
3601   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3602       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3603       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3604       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3605     Alignment = Align(16);
3606 
3607   // ByVal parameters are aligned as requested.
3608   if (Flags.isByVal()) {
3609     auto BVAlign = Flags.getNonZeroByValAlign();
3610     if (BVAlign > PtrByteSize) {
3611       if (BVAlign.value() % PtrByteSize != 0)
3612         llvm_unreachable(
3613             "ByVal alignment is not a multiple of the pointer size");
3614 
3615       Alignment = BVAlign;
3616     }
3617   }
3618 
3619   // Array members are always packed to their original alignment.
3620   if (Flags.isInConsecutiveRegs()) {
3621     // If the array member was split into multiple registers, the first
3622     // needs to be aligned to the size of the full type.  (Except for
3623     // ppcf128, which is only aligned as its f64 components.)
3624     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3625       Alignment = Align(OrigVT.getStoreSize());
3626     else
3627       Alignment = Align(ArgVT.getStoreSize());
3628   }
3629 
3630   return Alignment;
3631 }
3632 
3633 /// CalculateStackSlotUsed - Return whether this argument will use its
3634 /// stack slot (instead of being passed in registers).  ArgOffset,
3635 /// AvailableFPRs, and AvailableVRs must hold the current argument
3636 /// position, and will be updated to account for this argument.
3637 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3638                                    unsigned PtrByteSize, unsigned LinkageSize,
3639                                    unsigned ParamAreaSize, unsigned &ArgOffset,
3640                                    unsigned &AvailableFPRs,
3641                                    unsigned &AvailableVRs) {
3642   bool UseMemory = false;
3643 
3644   // Respect alignment of argument on the stack.
3645   Align Alignment =
3646       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3647   ArgOffset = alignTo(ArgOffset, Alignment);
3648   // If there's no space left in the argument save area, we must
3649   // use memory (this check also catches zero-sized arguments).
3650   if (ArgOffset >= LinkageSize + ParamAreaSize)
3651     UseMemory = true;
3652 
3653   // Allocate argument on the stack.
3654   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3655   if (Flags.isInConsecutiveRegsLast())
3656     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3657   // If we overran the argument save area, we must use memory
3658   // (this check catches arguments passed partially in memory)
3659   if (ArgOffset > LinkageSize + ParamAreaSize)
3660     UseMemory = true;
3661 
3662   // However, if the argument is actually passed in an FPR or a VR,
3663   // we don't use memory after all.
3664   if (!Flags.isByVal()) {
3665     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3666       if (AvailableFPRs > 0) {
3667         --AvailableFPRs;
3668         return false;
3669       }
3670     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3671         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3672         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3673         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3674       if (AvailableVRs > 0) {
3675         --AvailableVRs;
3676         return false;
3677       }
3678   }
3679 
3680   return UseMemory;
3681 }
3682 
3683 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3684 /// ensure minimum alignment required for target.
3685 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3686                                      unsigned NumBytes) {
3687   return alignTo(NumBytes, Lowering->getStackAlign());
3688 }
3689 
3690 SDValue PPCTargetLowering::LowerFormalArguments(
3691     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3692     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3693     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3694   if (Subtarget.isAIXABI())
3695     return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3696                                     InVals);
3697   if (Subtarget.is64BitELFABI())
3698     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3699                                        InVals);
3700   if (Subtarget.is32BitELFABI())
3701     return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3702                                        InVals);
3703 
3704   return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3705                                      InVals);
3706 }
3707 
3708 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3709     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3710     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3711     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3712 
3713   // 32-bit SVR4 ABI Stack Frame Layout:
3714   //              +-----------------------------------+
3715   //        +-->  |            Back chain             |
3716   //        |     +-----------------------------------+
3717   //        |     | Floating-point register save area |
3718   //        |     +-----------------------------------+
3719   //        |     |    General register save area     |
3720   //        |     +-----------------------------------+
3721   //        |     |          CR save word             |
3722   //        |     +-----------------------------------+
3723   //        |     |         VRSAVE save word          |
3724   //        |     +-----------------------------------+
3725   //        |     |         Alignment padding         |
3726   //        |     +-----------------------------------+
3727   //        |     |     Vector register save area     |
3728   //        |     +-----------------------------------+
3729   //        |     |       Local variable space        |
3730   //        |     +-----------------------------------+
3731   //        |     |        Parameter list area        |
3732   //        |     +-----------------------------------+
3733   //        |     |           LR save word            |
3734   //        |     +-----------------------------------+
3735   // SP-->  +---  |            Back chain             |
3736   //              +-----------------------------------+
3737   //
3738   // Specifications:
3739   //   System V Application Binary Interface PowerPC Processor Supplement
3740   //   AltiVec Technology Programming Interface Manual
3741 
3742   MachineFunction &MF = DAG.getMachineFunction();
3743   MachineFrameInfo &MFI = MF.getFrameInfo();
3744   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3745 
3746   EVT PtrVT = getPointerTy(MF.getDataLayout());
3747   // Potential tail calls could cause overwriting of argument stack slots.
3748   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3749                        (CallConv == CallingConv::Fast));
3750   const Align PtrAlign(4);
3751 
3752   // Assign locations to all of the incoming arguments.
3753   SmallVector<CCValAssign, 16> ArgLocs;
3754   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3755                  *DAG.getContext());
3756 
3757   // Reserve space for the linkage area on the stack.
3758   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3759   CCInfo.AllocateStack(LinkageSize, PtrAlign);
3760   if (useSoftFloat())
3761     CCInfo.PreAnalyzeFormalArguments(Ins);
3762 
3763   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3764   CCInfo.clearWasPPCF128();
3765 
3766   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3767     CCValAssign &VA = ArgLocs[i];
3768 
3769     // Arguments stored in registers.
3770     if (VA.isRegLoc()) {
3771       const TargetRegisterClass *RC;
3772       EVT ValVT = VA.getValVT();
3773 
3774       switch (ValVT.getSimpleVT().SimpleTy) {
3775         default:
3776           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3777         case MVT::i1:
3778         case MVT::i32:
3779           RC = &PPC::GPRCRegClass;
3780           break;
3781         case MVT::f32:
3782           if (Subtarget.hasP8Vector())
3783             RC = &PPC::VSSRCRegClass;
3784           else if (Subtarget.hasSPE())
3785             RC = &PPC::GPRCRegClass;
3786           else
3787             RC = &PPC::F4RCRegClass;
3788           break;
3789         case MVT::f64:
3790           if (Subtarget.hasVSX())
3791             RC = &PPC::VSFRCRegClass;
3792           else if (Subtarget.hasSPE())
3793             // SPE passes doubles in GPR pairs.
3794             RC = &PPC::GPRCRegClass;
3795           else
3796             RC = &PPC::F8RCRegClass;
3797           break;
3798         case MVT::v16i8:
3799         case MVT::v8i16:
3800         case MVT::v4i32:
3801           RC = &PPC::VRRCRegClass;
3802           break;
3803         case MVT::v4f32:
3804           RC = &PPC::VRRCRegClass;
3805           break;
3806         case MVT::v2f64:
3807         case MVT::v2i64:
3808           RC = &PPC::VRRCRegClass;
3809           break;
3810       }
3811 
3812       SDValue ArgValue;
3813       // Transform the arguments stored in physical registers into
3814       // virtual ones.
3815       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3816         assert(i + 1 < e && "No second half of double precision argument");
3817         unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3818         unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3819         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3820         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3821         if (!Subtarget.isLittleEndian())
3822           std::swap (ArgValueLo, ArgValueHi);
3823         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3824                                ArgValueHi);
3825       } else {
3826         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3827         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3828                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
3829         if (ValVT == MVT::i1)
3830           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3831       }
3832 
3833       InVals.push_back(ArgValue);
3834     } else {
3835       // Argument stored in memory.
3836       assert(VA.isMemLoc());
3837 
3838       // Get the extended size of the argument type in stack
3839       unsigned ArgSize = VA.getLocVT().getStoreSize();
3840       // Get the actual size of the argument type
3841       unsigned ObjSize = VA.getValVT().getStoreSize();
3842       unsigned ArgOffset = VA.getLocMemOffset();
3843       // Stack objects in PPC32 are right justified.
3844       ArgOffset += ArgSize - ObjSize;
3845       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3846 
3847       // Create load nodes to retrieve arguments from the stack.
3848       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3849       InVals.push_back(
3850           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3851     }
3852   }
3853 
3854   // Assign locations to all of the incoming aggregate by value arguments.
3855   // Aggregates passed by value are stored in the local variable space of the
3856   // caller's stack frame, right above the parameter list area.
3857   SmallVector<CCValAssign, 16> ByValArgLocs;
3858   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3859                       ByValArgLocs, *DAG.getContext());
3860 
3861   // Reserve stack space for the allocations in CCInfo.
3862   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3863 
3864   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3865 
3866   // Area that is at least reserved in the caller of this function.
3867   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3868   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3869 
3870   // Set the size that is at least reserved in caller of this function.  Tail
3871   // call optimized function's reserved stack space needs to be aligned so that
3872   // taking the difference between two stack areas will result in an aligned
3873   // stack.
3874   MinReservedArea =
3875       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3876   FuncInfo->setMinReservedArea(MinReservedArea);
3877 
3878   SmallVector<SDValue, 8> MemOps;
3879 
3880   // If the function takes variable number of arguments, make a frame index for
3881   // the start of the first vararg value... for expansion of llvm.va_start.
3882   if (isVarArg) {
3883     static const MCPhysReg GPArgRegs[] = {
3884       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3885       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3886     };
3887     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3888 
3889     static const MCPhysReg FPArgRegs[] = {
3890       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3891       PPC::F8
3892     };
3893     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3894 
3895     if (useSoftFloat() || hasSPE())
3896        NumFPArgRegs = 0;
3897 
3898     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3899     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3900 
3901     // Make room for NumGPArgRegs and NumFPArgRegs.
3902     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3903                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3904 
3905     FuncInfo->setVarArgsStackOffset(
3906       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3907                             CCInfo.getNextStackOffset(), true));
3908 
3909     FuncInfo->setVarArgsFrameIndex(
3910         MFI.CreateStackObject(Depth, Align(8), false));
3911     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3912 
3913     // The fixed integer arguments of a variadic function are stored to the
3914     // VarArgsFrameIndex on the stack so that they may be loaded by
3915     // dereferencing the result of va_next.
3916     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3917       // Get an existing live-in vreg, or add a new one.
3918       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3919       if (!VReg)
3920         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3921 
3922       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3923       SDValue Store =
3924           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3925       MemOps.push_back(Store);
3926       // Increment the address by four for the next argument to store
3927       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3928       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3929     }
3930 
3931     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3932     // is set.
3933     // The double arguments are stored to the VarArgsFrameIndex
3934     // on the stack.
3935     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3936       // Get an existing live-in vreg, or add a new one.
3937       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3938       if (!VReg)
3939         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3940 
3941       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3942       SDValue Store =
3943           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3944       MemOps.push_back(Store);
3945       // Increment the address by eight for the next argument to store
3946       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3947                                          PtrVT);
3948       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3949     }
3950   }
3951 
3952   if (!MemOps.empty())
3953     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3954 
3955   return Chain;
3956 }
3957 
3958 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3959 // value to MVT::i64 and then truncate to the correct register size.
3960 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3961                                              EVT ObjectVT, SelectionDAG &DAG,
3962                                              SDValue ArgVal,
3963                                              const SDLoc &dl) const {
3964   if (Flags.isSExt())
3965     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3966                          DAG.getValueType(ObjectVT));
3967   else if (Flags.isZExt())
3968     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3969                          DAG.getValueType(ObjectVT));
3970 
3971   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3972 }
3973 
3974 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3975     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3976     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3977     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3978   // TODO: add description of PPC stack frame format, or at least some docs.
3979   //
3980   bool isELFv2ABI = Subtarget.isELFv2ABI();
3981   bool isLittleEndian = Subtarget.isLittleEndian();
3982   MachineFunction &MF = DAG.getMachineFunction();
3983   MachineFrameInfo &MFI = MF.getFrameInfo();
3984   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3985 
3986   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3987          "fastcc not supported on varargs functions");
3988 
3989   EVT PtrVT = getPointerTy(MF.getDataLayout());
3990   // Potential tail calls could cause overwriting of argument stack slots.
3991   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3992                        (CallConv == CallingConv::Fast));
3993   unsigned PtrByteSize = 8;
3994   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3995 
3996   static const MCPhysReg GPR[] = {
3997     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3998     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3999   };
4000   static const MCPhysReg VR[] = {
4001     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4002     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4003   };
4004 
4005   const unsigned Num_GPR_Regs = array_lengthof(GPR);
4006   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4007   const unsigned Num_VR_Regs  = array_lengthof(VR);
4008 
4009   // Do a first pass over the arguments to determine whether the ABI
4010   // guarantees that our caller has allocated the parameter save area
4011   // on its stack frame.  In the ELFv1 ABI, this is always the case;
4012   // in the ELFv2 ABI, it is true if this is a vararg function or if
4013   // any parameter is located in a stack slot.
4014 
4015   bool HasParameterArea = !isELFv2ABI || isVarArg;
4016   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4017   unsigned NumBytes = LinkageSize;
4018   unsigned AvailableFPRs = Num_FPR_Regs;
4019   unsigned AvailableVRs = Num_VR_Regs;
4020   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4021     if (Ins[i].Flags.isNest())
4022       continue;
4023 
4024     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4025                                PtrByteSize, LinkageSize, ParamAreaSize,
4026                                NumBytes, AvailableFPRs, AvailableVRs))
4027       HasParameterArea = true;
4028   }
4029 
4030   // Add DAG nodes to load the arguments or copy them out of registers.  On
4031   // entry to a function on PPC, the arguments start after the linkage area,
4032   // although the first ones are often in registers.
4033 
4034   unsigned ArgOffset = LinkageSize;
4035   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4036   SmallVector<SDValue, 8> MemOps;
4037   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4038   unsigned CurArgIdx = 0;
4039   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4040     SDValue ArgVal;
4041     bool needsLoad = false;
4042     EVT ObjectVT = Ins[ArgNo].VT;
4043     EVT OrigVT = Ins[ArgNo].ArgVT;
4044     unsigned ObjSize = ObjectVT.getStoreSize();
4045     unsigned ArgSize = ObjSize;
4046     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4047     if (Ins[ArgNo].isOrigArg()) {
4048       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4049       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4050     }
4051     // We re-align the argument offset for each argument, except when using the
4052     // fast calling convention, when we need to make sure we do that only when
4053     // we'll actually use a stack slot.
4054     unsigned CurArgOffset;
4055     Align Alignment;
4056     auto ComputeArgOffset = [&]() {
4057       /* Respect alignment of argument on the stack.  */
4058       Alignment =
4059           CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4060       ArgOffset = alignTo(ArgOffset, Alignment);
4061       CurArgOffset = ArgOffset;
4062     };
4063 
4064     if (CallConv != CallingConv::Fast) {
4065       ComputeArgOffset();
4066 
4067       /* Compute GPR index associated with argument offset.  */
4068       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4069       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4070     }
4071 
4072     // FIXME the codegen can be much improved in some cases.
4073     // We do not have to keep everything in memory.
4074     if (Flags.isByVal()) {
4075       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4076 
4077       if (CallConv == CallingConv::Fast)
4078         ComputeArgOffset();
4079 
4080       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4081       ObjSize = Flags.getByValSize();
4082       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4083       // Empty aggregate parameters do not take up registers.  Examples:
4084       //   struct { } a;
4085       //   union  { } b;
4086       //   int c[0];
4087       // etc.  However, we have to provide a place-holder in InVals, so
4088       // pretend we have an 8-byte item at the current address for that
4089       // purpose.
4090       if (!ObjSize) {
4091         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4092         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4093         InVals.push_back(FIN);
4094         continue;
4095       }
4096 
4097       // Create a stack object covering all stack doublewords occupied
4098       // by the argument.  If the argument is (fully or partially) on
4099       // the stack, or if the argument is fully in registers but the
4100       // caller has allocated the parameter save anyway, we can refer
4101       // directly to the caller's stack frame.  Otherwise, create a
4102       // local copy in our own frame.
4103       int FI;
4104       if (HasParameterArea ||
4105           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4106         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4107       else
4108         FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4109       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4110 
4111       // Handle aggregates smaller than 8 bytes.
4112       if (ObjSize < PtrByteSize) {
4113         // The value of the object is its address, which differs from the
4114         // address of the enclosing doubleword on big-endian systems.
4115         SDValue Arg = FIN;
4116         if (!isLittleEndian) {
4117           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4118           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4119         }
4120         InVals.push_back(Arg);
4121 
4122         if (GPR_idx != Num_GPR_Regs) {
4123           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4124           FuncInfo->addLiveInAttr(VReg, Flags);
4125           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4126           SDValue Store;
4127 
4128           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4129             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4130                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
4131             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4132                                       MachinePointerInfo(&*FuncArg), ObjType);
4133           } else {
4134             // For sizes that don't fit a truncating store (3, 5, 6, 7),
4135             // store the whole register as-is to the parameter save area
4136             // slot.
4137             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4138                                  MachinePointerInfo(&*FuncArg));
4139           }
4140 
4141           MemOps.push_back(Store);
4142         }
4143         // Whether we copied from a register or not, advance the offset
4144         // into the parameter save area by a full doubleword.
4145         ArgOffset += PtrByteSize;
4146         continue;
4147       }
4148 
4149       // The value of the object is its address, which is the address of
4150       // its first stack doubleword.
4151       InVals.push_back(FIN);
4152 
4153       // Store whatever pieces of the object are in registers to memory.
4154       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4155         if (GPR_idx == Num_GPR_Regs)
4156           break;
4157 
4158         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4159         FuncInfo->addLiveInAttr(VReg, Flags);
4160         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4161         SDValue Addr = FIN;
4162         if (j) {
4163           SDValue Off = DAG.getConstant(j, dl, PtrVT);
4164           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4165         }
4166         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4167                                      MachinePointerInfo(&*FuncArg, j));
4168         MemOps.push_back(Store);
4169         ++GPR_idx;
4170       }
4171       ArgOffset += ArgSize;
4172       continue;
4173     }
4174 
4175     switch (ObjectVT.getSimpleVT().SimpleTy) {
4176     default: llvm_unreachable("Unhandled argument type!");
4177     case MVT::i1:
4178     case MVT::i32:
4179     case MVT::i64:
4180       if (Flags.isNest()) {
4181         // The 'nest' parameter, if any, is passed in R11.
4182         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4183         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4184 
4185         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4186           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4187 
4188         break;
4189       }
4190 
4191       // These can be scalar arguments or elements of an integer array type
4192       // passed directly.  Clang may use those instead of "byval" aggregate
4193       // types to avoid forcing arguments to memory unnecessarily.
4194       if (GPR_idx != Num_GPR_Regs) {
4195         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4196         FuncInfo->addLiveInAttr(VReg, Flags);
4197         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4198 
4199         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4200           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4201           // value to MVT::i64 and then truncate to the correct register size.
4202           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4203       } else {
4204         if (CallConv == CallingConv::Fast)
4205           ComputeArgOffset();
4206 
4207         needsLoad = true;
4208         ArgSize = PtrByteSize;
4209       }
4210       if (CallConv != CallingConv::Fast || needsLoad)
4211         ArgOffset += 8;
4212       break;
4213 
4214     case MVT::f32:
4215     case MVT::f64:
4216       // These can be scalar arguments or elements of a float array type
4217       // passed directly.  The latter are used to implement ELFv2 homogenous
4218       // float aggregates.
4219       if (FPR_idx != Num_FPR_Regs) {
4220         unsigned VReg;
4221 
4222         if (ObjectVT == MVT::f32)
4223           VReg = MF.addLiveIn(FPR[FPR_idx],
4224                               Subtarget.hasP8Vector()
4225                                   ? &PPC::VSSRCRegClass
4226                                   : &PPC::F4RCRegClass);
4227         else
4228           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4229                                                 ? &PPC::VSFRCRegClass
4230                                                 : &PPC::F8RCRegClass);
4231 
4232         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4233         ++FPR_idx;
4234       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4235         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4236         // once we support fp <-> gpr moves.
4237 
4238         // This can only ever happen in the presence of f32 array types,
4239         // since otherwise we never run out of FPRs before running out
4240         // of GPRs.
4241         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4242         FuncInfo->addLiveInAttr(VReg, Flags);
4243         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4244 
4245         if (ObjectVT == MVT::f32) {
4246           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4247             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4248                                  DAG.getConstant(32, dl, MVT::i32));
4249           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4250         }
4251 
4252         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4253       } else {
4254         if (CallConv == CallingConv::Fast)
4255           ComputeArgOffset();
4256 
4257         needsLoad = true;
4258       }
4259 
4260       // When passing an array of floats, the array occupies consecutive
4261       // space in the argument area; only round up to the next doubleword
4262       // at the end of the array.  Otherwise, each float takes 8 bytes.
4263       if (CallConv != CallingConv::Fast || needsLoad) {
4264         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4265         ArgOffset += ArgSize;
4266         if (Flags.isInConsecutiveRegsLast())
4267           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4268       }
4269       break;
4270     case MVT::v4f32:
4271     case MVT::v4i32:
4272     case MVT::v8i16:
4273     case MVT::v16i8:
4274     case MVT::v2f64:
4275     case MVT::v2i64:
4276     case MVT::v1i128:
4277     case MVT::f128:
4278       // These can be scalar arguments or elements of a vector array type
4279       // passed directly.  The latter are used to implement ELFv2 homogenous
4280       // vector aggregates.
4281       if (VR_idx != Num_VR_Regs) {
4282         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4283         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4284         ++VR_idx;
4285       } else {
4286         if (CallConv == CallingConv::Fast)
4287           ComputeArgOffset();
4288         needsLoad = true;
4289       }
4290       if (CallConv != CallingConv::Fast || needsLoad)
4291         ArgOffset += 16;
4292       break;
4293     }
4294 
4295     // We need to load the argument to a virtual register if we determined
4296     // above that we ran out of physical registers of the appropriate type.
4297     if (needsLoad) {
4298       if (ObjSize < ArgSize && !isLittleEndian)
4299         CurArgOffset += ArgSize - ObjSize;
4300       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4301       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4302       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4303     }
4304 
4305     InVals.push_back(ArgVal);
4306   }
4307 
4308   // Area that is at least reserved in the caller of this function.
4309   unsigned MinReservedArea;
4310   if (HasParameterArea)
4311     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4312   else
4313     MinReservedArea = LinkageSize;
4314 
4315   // Set the size that is at least reserved in caller of this function.  Tail
4316   // call optimized functions' reserved stack space needs to be aligned so that
4317   // taking the difference between two stack areas will result in an aligned
4318   // stack.
4319   MinReservedArea =
4320       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4321   FuncInfo->setMinReservedArea(MinReservedArea);
4322 
4323   // If the function takes variable number of arguments, make a frame index for
4324   // the start of the first vararg value... for expansion of llvm.va_start.
4325   // On ELFv2ABI spec, it writes:
4326   // C programs that are intended to be *portable* across different compilers
4327   // and architectures must use the header file <stdarg.h> to deal with variable
4328   // argument lists.
4329   if (isVarArg && MFI.hasVAStart()) {
4330     int Depth = ArgOffset;
4331 
4332     FuncInfo->setVarArgsFrameIndex(
4333       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4334     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4335 
4336     // If this function is vararg, store any remaining integer argument regs
4337     // to their spots on the stack so that they may be loaded by dereferencing
4338     // the result of va_next.
4339     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4340          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4341       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4342       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4343       SDValue Store =
4344           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4345       MemOps.push_back(Store);
4346       // Increment the address by four for the next argument to store
4347       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4348       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4349     }
4350   }
4351 
4352   if (!MemOps.empty())
4353     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4354 
4355   return Chain;
4356 }
4357 
4358 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4359     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4360     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4361     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4362   // TODO: add description of PPC stack frame format, or at least some docs.
4363   //
4364   MachineFunction &MF = DAG.getMachineFunction();
4365   MachineFrameInfo &MFI = MF.getFrameInfo();
4366   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4367 
4368   EVT PtrVT = getPointerTy(MF.getDataLayout());
4369   bool isPPC64 = PtrVT == MVT::i64;
4370   // Potential tail calls could cause overwriting of argument stack slots.
4371   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4372                        (CallConv == CallingConv::Fast));
4373   unsigned PtrByteSize = isPPC64 ? 8 : 4;
4374   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4375   unsigned ArgOffset = LinkageSize;
4376   // Area that is at least reserved in caller of this function.
4377   unsigned MinReservedArea = ArgOffset;
4378 
4379   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4380     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4381     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4382   };
4383   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4384     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4385     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4386   };
4387   static const MCPhysReg VR[] = {
4388     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4389     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4390   };
4391 
4392   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4393   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4394   const unsigned Num_VR_Regs  = array_lengthof( VR);
4395 
4396   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4397 
4398   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4399 
4400   // In 32-bit non-varargs functions, the stack space for vectors is after the
4401   // stack space for non-vectors.  We do not use this space unless we have
4402   // too many vectors to fit in registers, something that only occurs in
4403   // constructed examples:), but we have to walk the arglist to figure
4404   // that out...for the pathological case, compute VecArgOffset as the
4405   // start of the vector parameter area.  Computing VecArgOffset is the
4406   // entire point of the following loop.
4407   unsigned VecArgOffset = ArgOffset;
4408   if (!isVarArg && !isPPC64) {
4409     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4410          ++ArgNo) {
4411       EVT ObjectVT = Ins[ArgNo].VT;
4412       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4413 
4414       if (Flags.isByVal()) {
4415         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4416         unsigned ObjSize = Flags.getByValSize();
4417         unsigned ArgSize =
4418                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4419         VecArgOffset += ArgSize;
4420         continue;
4421       }
4422 
4423       switch(ObjectVT.getSimpleVT().SimpleTy) {
4424       default: llvm_unreachable("Unhandled argument type!");
4425       case MVT::i1:
4426       case MVT::i32:
4427       case MVT::f32:
4428         VecArgOffset += 4;
4429         break;
4430       case MVT::i64:  // PPC64
4431       case MVT::f64:
4432         // FIXME: We are guaranteed to be !isPPC64 at this point.
4433         // Does MVT::i64 apply?
4434         VecArgOffset += 8;
4435         break;
4436       case MVT::v4f32:
4437       case MVT::v4i32:
4438       case MVT::v8i16:
4439       case MVT::v16i8:
4440         // Nothing to do, we're only looking at Nonvector args here.
4441         break;
4442       }
4443     }
4444   }
4445   // We've found where the vector parameter area in memory is.  Skip the
4446   // first 12 parameters; these don't use that memory.
4447   VecArgOffset = ((VecArgOffset+15)/16)*16;
4448   VecArgOffset += 12*16;
4449 
4450   // Add DAG nodes to load the arguments or copy them out of registers.  On
4451   // entry to a function on PPC, the arguments start after the linkage area,
4452   // although the first ones are often in registers.
4453 
4454   SmallVector<SDValue, 8> MemOps;
4455   unsigned nAltivecParamsAtEnd = 0;
4456   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4457   unsigned CurArgIdx = 0;
4458   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4459     SDValue ArgVal;
4460     bool needsLoad = false;
4461     EVT ObjectVT = Ins[ArgNo].VT;
4462     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4463     unsigned ArgSize = ObjSize;
4464     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4465     if (Ins[ArgNo].isOrigArg()) {
4466       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4467       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4468     }
4469     unsigned CurArgOffset = ArgOffset;
4470 
4471     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4472     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4473         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4474       if (isVarArg || isPPC64) {
4475         MinReservedArea = ((MinReservedArea+15)/16)*16;
4476         MinReservedArea += CalculateStackSlotSize(ObjectVT,
4477                                                   Flags,
4478                                                   PtrByteSize);
4479       } else  nAltivecParamsAtEnd++;
4480     } else
4481       // Calculate min reserved area.
4482       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4483                                                 Flags,
4484                                                 PtrByteSize);
4485 
4486     // FIXME the codegen can be much improved in some cases.
4487     // We do not have to keep everything in memory.
4488     if (Flags.isByVal()) {
4489       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4490 
4491       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4492       ObjSize = Flags.getByValSize();
4493       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4494       // Objects of size 1 and 2 are right justified, everything else is
4495       // left justified.  This means the memory address is adjusted forwards.
4496       if (ObjSize==1 || ObjSize==2) {
4497         CurArgOffset = CurArgOffset + (4 - ObjSize);
4498       }
4499       // The value of the object is its address.
4500       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4501       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4502       InVals.push_back(FIN);
4503       if (ObjSize==1 || ObjSize==2) {
4504         if (GPR_idx != Num_GPR_Regs) {
4505           unsigned VReg;
4506           if (isPPC64)
4507             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4508           else
4509             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4510           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4511           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4512           SDValue Store =
4513               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4514                                 MachinePointerInfo(&*FuncArg), ObjType);
4515           MemOps.push_back(Store);
4516           ++GPR_idx;
4517         }
4518 
4519         ArgOffset += PtrByteSize;
4520 
4521         continue;
4522       }
4523       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4524         // Store whatever pieces of the object are in registers
4525         // to memory.  ArgOffset will be the address of the beginning
4526         // of the object.
4527         if (GPR_idx != Num_GPR_Regs) {
4528           unsigned VReg;
4529           if (isPPC64)
4530             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4531           else
4532             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4533           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4534           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4535           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4536           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4537                                        MachinePointerInfo(&*FuncArg, j));
4538           MemOps.push_back(Store);
4539           ++GPR_idx;
4540           ArgOffset += PtrByteSize;
4541         } else {
4542           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4543           break;
4544         }
4545       }
4546       continue;
4547     }
4548 
4549     switch (ObjectVT.getSimpleVT().SimpleTy) {
4550     default: llvm_unreachable("Unhandled argument type!");
4551     case MVT::i1:
4552     case MVT::i32:
4553       if (!isPPC64) {
4554         if (GPR_idx != Num_GPR_Regs) {
4555           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4556           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4557 
4558           if (ObjectVT == MVT::i1)
4559             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4560 
4561           ++GPR_idx;
4562         } else {
4563           needsLoad = true;
4564           ArgSize = PtrByteSize;
4565         }
4566         // All int arguments reserve stack space in the Darwin ABI.
4567         ArgOffset += PtrByteSize;
4568         break;
4569       }
4570       LLVM_FALLTHROUGH;
4571     case MVT::i64:  // PPC64
4572       if (GPR_idx != Num_GPR_Regs) {
4573         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4574         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4575 
4576         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4577           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4578           // value to MVT::i64 and then truncate to the correct register size.
4579           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4580 
4581         ++GPR_idx;
4582       } else {
4583         needsLoad = true;
4584         ArgSize = PtrByteSize;
4585       }
4586       // All int arguments reserve stack space in the Darwin ABI.
4587       ArgOffset += 8;
4588       break;
4589 
4590     case MVT::f32:
4591     case MVT::f64:
4592       // Every 4 bytes of argument space consumes one of the GPRs available for
4593       // argument passing.
4594       if (GPR_idx != Num_GPR_Regs) {
4595         ++GPR_idx;
4596         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4597           ++GPR_idx;
4598       }
4599       if (FPR_idx != Num_FPR_Regs) {
4600         unsigned VReg;
4601 
4602         if (ObjectVT == MVT::f32)
4603           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4604         else
4605           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4606 
4607         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4608         ++FPR_idx;
4609       } else {
4610         needsLoad = true;
4611       }
4612 
4613       // All FP arguments reserve stack space in the Darwin ABI.
4614       ArgOffset += isPPC64 ? 8 : ObjSize;
4615       break;
4616     case MVT::v4f32:
4617     case MVT::v4i32:
4618     case MVT::v8i16:
4619     case MVT::v16i8:
4620       // Note that vector arguments in registers don't reserve stack space,
4621       // except in varargs functions.
4622       if (VR_idx != Num_VR_Regs) {
4623         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4624         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4625         if (isVarArg) {
4626           while ((ArgOffset % 16) != 0) {
4627             ArgOffset += PtrByteSize;
4628             if (GPR_idx != Num_GPR_Regs)
4629               GPR_idx++;
4630           }
4631           ArgOffset += 16;
4632           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4633         }
4634         ++VR_idx;
4635       } else {
4636         if (!isVarArg && !isPPC64) {
4637           // Vectors go after all the nonvectors.
4638           CurArgOffset = VecArgOffset;
4639           VecArgOffset += 16;
4640         } else {
4641           // Vectors are aligned.
4642           ArgOffset = ((ArgOffset+15)/16)*16;
4643           CurArgOffset = ArgOffset;
4644           ArgOffset += 16;
4645         }
4646         needsLoad = true;
4647       }
4648       break;
4649     }
4650 
4651     // We need to load the argument to a virtual register if we determined above
4652     // that we ran out of physical registers of the appropriate type.
4653     if (needsLoad) {
4654       int FI = MFI.CreateFixedObject(ObjSize,
4655                                      CurArgOffset + (ArgSize - ObjSize),
4656                                      isImmutable);
4657       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4658       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4659     }
4660 
4661     InVals.push_back(ArgVal);
4662   }
4663 
4664   // Allow for Altivec parameters at the end, if needed.
4665   if (nAltivecParamsAtEnd) {
4666     MinReservedArea = ((MinReservedArea+15)/16)*16;
4667     MinReservedArea += 16*nAltivecParamsAtEnd;
4668   }
4669 
4670   // Area that is at least reserved in the caller of this function.
4671   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4672 
4673   // Set the size that is at least reserved in caller of this function.  Tail
4674   // call optimized functions' reserved stack space needs to be aligned so that
4675   // taking the difference between two stack areas will result in an aligned
4676   // stack.
4677   MinReservedArea =
4678       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4679   FuncInfo->setMinReservedArea(MinReservedArea);
4680 
4681   // If the function takes variable number of arguments, make a frame index for
4682   // the start of the first vararg value... for expansion of llvm.va_start.
4683   if (isVarArg) {
4684     int Depth = ArgOffset;
4685 
4686     FuncInfo->setVarArgsFrameIndex(
4687       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4688                             Depth, true));
4689     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4690 
4691     // If this function is vararg, store any remaining integer argument regs
4692     // to their spots on the stack so that they may be loaded by dereferencing
4693     // the result of va_next.
4694     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4695       unsigned VReg;
4696 
4697       if (isPPC64)
4698         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4699       else
4700         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4701 
4702       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4703       SDValue Store =
4704           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4705       MemOps.push_back(Store);
4706       // Increment the address by four for the next argument to store
4707       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4708       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4709     }
4710   }
4711 
4712   if (!MemOps.empty())
4713     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4714 
4715   return Chain;
4716 }
4717 
4718 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4719 /// adjusted to accommodate the arguments for the tailcall.
4720 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4721                                    unsigned ParamSize) {
4722 
4723   if (!isTailCall) return 0;
4724 
4725   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4726   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4727   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4728   // Remember only if the new adjustment is bigger.
4729   if (SPDiff < FI->getTailCallSPDelta())
4730     FI->setTailCallSPDelta(SPDiff);
4731 
4732   return SPDiff;
4733 }
4734 
4735 static bool isFunctionGlobalAddress(SDValue Callee);
4736 
4737 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4738                               const TargetMachine &TM) {
4739   // It does not make sense to call callsShareTOCBase() with a caller that
4740   // is PC Relative since PC Relative callers do not have a TOC.
4741 #ifndef NDEBUG
4742   const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4743   assert(!STICaller->isUsingPCRelativeCalls() &&
4744          "PC Relative callers do not have a TOC and cannot share a TOC Base");
4745 #endif
4746 
4747   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4748   // don't have enough information to determine if the caller and callee share
4749   // the same  TOC base, so we have to pessimistically assume they don't for
4750   // correctness.
4751   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4752   if (!G)
4753     return false;
4754 
4755   const GlobalValue *GV = G->getGlobal();
4756 
4757   // If the callee is preemptable, then the static linker will use a plt-stub
4758   // which saves the toc to the stack, and needs a nop after the call
4759   // instruction to convert to a toc-restore.
4760   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4761     return false;
4762 
4763   // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4764   // We may need a TOC restore in the situation where the caller requires a
4765   // valid TOC but the callee is PC Relative and does not.
4766   const Function *F = dyn_cast<Function>(GV);
4767   const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4768 
4769   // If we have an Alias we can try to get the function from there.
4770   if (Alias) {
4771     const GlobalObject *GlobalObj = Alias->getBaseObject();
4772     F = dyn_cast<Function>(GlobalObj);
4773   }
4774 
4775   // If we still have no valid function pointer we do not have enough
4776   // information to determine if the callee uses PC Relative calls so we must
4777   // assume that it does.
4778   if (!F)
4779     return false;
4780 
4781   // If the callee uses PC Relative we cannot guarantee that the callee won't
4782   // clobber the TOC of the caller and so we must assume that the two
4783   // functions do not share a TOC base.
4784   const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4785   if (STICallee->isUsingPCRelativeCalls())
4786     return false;
4787 
4788   // The medium and large code models are expected to provide a sufficiently
4789   // large TOC to provide all data addressing needs of a module with a
4790   // single TOC.
4791   if (CodeModel::Medium == TM.getCodeModel() ||
4792       CodeModel::Large == TM.getCodeModel())
4793     return true;
4794 
4795   // Otherwise we need to ensure callee and caller are in the same section,
4796   // since the linker may allocate multiple TOCs, and we don't know which
4797   // sections will belong to the same TOC base.
4798   if (!GV->isStrongDefinitionForLinker())
4799     return false;
4800 
4801   // Any explicitly-specified sections and section prefixes must also match.
4802   // Also, if we're using -ffunction-sections, then each function is always in
4803   // a different section (the same is true for COMDAT functions).
4804   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4805       GV->getSection() != Caller->getSection())
4806     return false;
4807   if (const auto *F = dyn_cast<Function>(GV)) {
4808     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4809       return false;
4810   }
4811 
4812   return true;
4813 }
4814 
4815 static bool
4816 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4817                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4818   assert(Subtarget.is64BitELFABI());
4819 
4820   const unsigned PtrByteSize = 8;
4821   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4822 
4823   static const MCPhysReg GPR[] = {
4824     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4825     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4826   };
4827   static const MCPhysReg VR[] = {
4828     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4829     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4830   };
4831 
4832   const unsigned NumGPRs = array_lengthof(GPR);
4833   const unsigned NumFPRs = 13;
4834   const unsigned NumVRs = array_lengthof(VR);
4835   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4836 
4837   unsigned NumBytes = LinkageSize;
4838   unsigned AvailableFPRs = NumFPRs;
4839   unsigned AvailableVRs = NumVRs;
4840 
4841   for (const ISD::OutputArg& Param : Outs) {
4842     if (Param.Flags.isNest()) continue;
4843 
4844     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4845                                LinkageSize, ParamAreaSize, NumBytes,
4846                                AvailableFPRs, AvailableVRs))
4847       return true;
4848   }
4849   return false;
4850 }
4851 
4852 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4853   if (CB.arg_size() != CallerFn->arg_size())
4854     return false;
4855 
4856   auto CalleeArgIter = CB.arg_begin();
4857   auto CalleeArgEnd = CB.arg_end();
4858   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4859 
4860   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4861     const Value* CalleeArg = *CalleeArgIter;
4862     const Value* CallerArg = &(*CallerArgIter);
4863     if (CalleeArg == CallerArg)
4864       continue;
4865 
4866     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4867     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4868     //      }
4869     // 1st argument of callee is undef and has the same type as caller.
4870     if (CalleeArg->getType() == CallerArg->getType() &&
4871         isa<UndefValue>(CalleeArg))
4872       continue;
4873 
4874     return false;
4875   }
4876 
4877   return true;
4878 }
4879 
4880 // Returns true if TCO is possible between the callers and callees
4881 // calling conventions.
4882 static bool
4883 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4884                                     CallingConv::ID CalleeCC) {
4885   // Tail calls are possible with fastcc and ccc.
4886   auto isTailCallableCC  = [] (CallingConv::ID CC){
4887       return  CC == CallingConv::C || CC == CallingConv::Fast;
4888   };
4889   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4890     return false;
4891 
4892   // We can safely tail call both fastcc and ccc callees from a c calling
4893   // convention caller. If the caller is fastcc, we may have less stack space
4894   // than a non-fastcc caller with the same signature so disable tail-calls in
4895   // that case.
4896   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4897 }
4898 
4899 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4900     SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4901     const SmallVectorImpl<ISD::OutputArg> &Outs,
4902     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4903   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4904 
4905   if (DisableSCO && !TailCallOpt) return false;
4906 
4907   // Variadic argument functions are not supported.
4908   if (isVarArg) return false;
4909 
4910   auto &Caller = DAG.getMachineFunction().getFunction();
4911   // Check that the calling conventions are compatible for tco.
4912   if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4913     return false;
4914 
4915   // Caller contains any byval parameter is not supported.
4916   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4917     return false;
4918 
4919   // Callee contains any byval parameter is not supported, too.
4920   // Note: This is a quick work around, because in some cases, e.g.
4921   // caller's stack size > callee's stack size, we are still able to apply
4922   // sibling call optimization. For example, gcc is able to do SCO for caller1
4923   // in the following example, but not for caller2.
4924   //   struct test {
4925   //     long int a;
4926   //     char ary[56];
4927   //   } gTest;
4928   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4929   //     b->a = v.a;
4930   //     return 0;
4931   //   }
4932   //   void caller1(struct test a, struct test c, struct test *b) {
4933   //     callee(gTest, b); }
4934   //   void caller2(struct test *b) { callee(gTest, b); }
4935   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4936     return false;
4937 
4938   // If callee and caller use different calling conventions, we cannot pass
4939   // parameters on stack since offsets for the parameter area may be different.
4940   if (Caller.getCallingConv() != CalleeCC &&
4941       needStackSlotPassParameters(Subtarget, Outs))
4942     return false;
4943 
4944   // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4945   // the caller and callee share the same TOC for TCO/SCO. If the caller and
4946   // callee potentially have different TOC bases then we cannot tail call since
4947   // we need to restore the TOC pointer after the call.
4948   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4949   // We cannot guarantee this for indirect calls or calls to external functions.
4950   // When PC-Relative addressing is used, the concept of the TOC is no longer
4951   // applicable so this check is not required.
4952   // Check first for indirect calls.
4953   if (!Subtarget.isUsingPCRelativeCalls() &&
4954       !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4955     return false;
4956 
4957   // Check if we share the TOC base.
4958   if (!Subtarget.isUsingPCRelativeCalls() &&
4959       !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4960     return false;
4961 
4962   // TCO allows altering callee ABI, so we don't have to check further.
4963   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4964     return true;
4965 
4966   if (DisableSCO) return false;
4967 
4968   // If callee use the same argument list that caller is using, then we can
4969   // apply SCO on this case. If it is not, then we need to check if callee needs
4970   // stack for passing arguments.
4971   // PC Relative tail calls may not have a CallBase.
4972   // If there is no CallBase we cannot verify if we have the same argument
4973   // list so assume that we don't have the same argument list.
4974   if (CB && !hasSameArgumentList(&Caller, *CB) &&
4975       needStackSlotPassParameters(Subtarget, Outs))
4976     return false;
4977   else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4978     return false;
4979 
4980   return true;
4981 }
4982 
4983 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4984 /// for tail call optimization. Targets which want to do tail call
4985 /// optimization should implement this function.
4986 bool
4987 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4988                                                      CallingConv::ID CalleeCC,
4989                                                      bool isVarArg,
4990                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4991                                                      SelectionDAG& DAG) const {
4992   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4993     return false;
4994 
4995   // Variable argument functions are not supported.
4996   if (isVarArg)
4997     return false;
4998 
4999   MachineFunction &MF = DAG.getMachineFunction();
5000   CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5001   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5002     // Functions containing by val parameters are not supported.
5003     for (unsigned i = 0; i != Ins.size(); i++) {
5004        ISD::ArgFlagsTy Flags = Ins[i].Flags;
5005        if (Flags.isByVal()) return false;
5006     }
5007 
5008     // Non-PIC/GOT tail calls are supported.
5009     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5010       return true;
5011 
5012     // At the moment we can only do local tail calls (in same module, hidden
5013     // or protected) if we are generating PIC.
5014     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
5015       return G->getGlobal()->hasHiddenVisibility()
5016           || G->getGlobal()->hasProtectedVisibility();
5017   }
5018 
5019   return false;
5020 }
5021 
5022 /// isCallCompatibleAddress - Return the immediate to use if the specified
5023 /// 32-bit value is representable in the immediate field of a BxA instruction.
5024 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
5025   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5026   if (!C) return nullptr;
5027 
5028   int Addr = C->getZExtValue();
5029   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
5030       SignExtend32<26>(Addr) != Addr)
5031     return nullptr;  // Top 6 bits have to be sext of immediate.
5032 
5033   return DAG
5034       .getConstant(
5035           (int)C->getZExtValue() >> 2, SDLoc(Op),
5036           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
5037       .getNode();
5038 }
5039 
5040 namespace {
5041 
5042 struct TailCallArgumentInfo {
5043   SDValue Arg;
5044   SDValue FrameIdxOp;
5045   int FrameIdx = 0;
5046 
5047   TailCallArgumentInfo() = default;
5048 };
5049 
5050 } // end anonymous namespace
5051 
5052 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5053 static void StoreTailCallArgumentsToStackSlot(
5054     SelectionDAG &DAG, SDValue Chain,
5055     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5056     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5057   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5058     SDValue Arg = TailCallArgs[i].Arg;
5059     SDValue FIN = TailCallArgs[i].FrameIdxOp;
5060     int FI = TailCallArgs[i].FrameIdx;
5061     // Store relative to framepointer.
5062     MemOpChains.push_back(DAG.getStore(
5063         Chain, dl, Arg, FIN,
5064         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
5065   }
5066 }
5067 
5068 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5069 /// the appropriate stack slot for the tail call optimized function call.
5070 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5071                                              SDValue OldRetAddr, SDValue OldFP,
5072                                              int SPDiff, const SDLoc &dl) {
5073   if (SPDiff) {
5074     // Calculate the new stack slot for the return address.
5075     MachineFunction &MF = DAG.getMachineFunction();
5076     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5077     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5078     bool isPPC64 = Subtarget.isPPC64();
5079     int SlotSize = isPPC64 ? 8 : 4;
5080     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5081     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5082                                                          NewRetAddrLoc, true);
5083     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5084     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5085     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5086                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5087   }
5088   return Chain;
5089 }
5090 
5091 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5092 /// the position of the argument.
5093 static void
5094 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5095                          SDValue Arg, int SPDiff, unsigned ArgOffset,
5096                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5097   int Offset = ArgOffset + SPDiff;
5098   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5099   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5100   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5101   SDValue FIN = DAG.getFrameIndex(FI, VT);
5102   TailCallArgumentInfo Info;
5103   Info.Arg = Arg;
5104   Info.FrameIdxOp = FIN;
5105   Info.FrameIdx = FI;
5106   TailCallArguments.push_back(Info);
5107 }
5108 
5109 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5110 /// stack slot. Returns the chain as result and the loaded frame pointers in
5111 /// LROpOut/FPOpout. Used when tail calling.
5112 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5113     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5114     SDValue &FPOpOut, const SDLoc &dl) const {
5115   if (SPDiff) {
5116     // Load the LR and FP stack slot for later adjusting.
5117     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5118     LROpOut = getReturnAddrFrameIndex(DAG);
5119     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5120     Chain = SDValue(LROpOut.getNode(), 1);
5121   }
5122   return Chain;
5123 }
5124 
5125 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5126 /// by "Src" to address "Dst" of size "Size".  Alignment information is
5127 /// specified by the specific parameter attribute. The copy will be passed as
5128 /// a byval function parameter.
5129 /// Sometimes what we are copying is the end of a larger object, the part that
5130 /// does not fit in registers.
5131 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5132                                          SDValue Chain, ISD::ArgFlagsTy Flags,
5133                                          SelectionDAG &DAG, const SDLoc &dl) {
5134   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5135   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5136                        Flags.getNonZeroByValAlign(), false, false, false,
5137                        MachinePointerInfo(), MachinePointerInfo());
5138 }
5139 
5140 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5141 /// tail calls.
5142 static void LowerMemOpCallTo(
5143     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5144     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5145     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5146     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5147   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5148   if (!isTailCall) {
5149     if (isVector) {
5150       SDValue StackPtr;
5151       if (isPPC64)
5152         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5153       else
5154         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5155       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5156                            DAG.getConstant(ArgOffset, dl, PtrVT));
5157     }
5158     MemOpChains.push_back(
5159         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5160     // Calculate and remember argument location.
5161   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5162                                   TailCallArguments);
5163 }
5164 
5165 static void
5166 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5167                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5168                 SDValue FPOp,
5169                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5170   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5171   // might overwrite each other in case of tail call optimization.
5172   SmallVector<SDValue, 8> MemOpChains2;
5173   // Do not flag preceding copytoreg stuff together with the following stuff.
5174   InFlag = SDValue();
5175   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5176                                     MemOpChains2, dl);
5177   if (!MemOpChains2.empty())
5178     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5179 
5180   // Store the return address to the appropriate stack slot.
5181   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5182 
5183   // Emit callseq_end just before tailcall node.
5184   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5185                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5186   InFlag = Chain.getValue(1);
5187 }
5188 
5189 // Is this global address that of a function that can be called by name? (as
5190 // opposed to something that must hold a descriptor for an indirect call).
5191 static bool isFunctionGlobalAddress(SDValue Callee) {
5192   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5193     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5194         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5195       return false;
5196 
5197     return G->getGlobal()->getValueType()->isFunctionTy();
5198   }
5199 
5200   return false;
5201 }
5202 
5203 SDValue PPCTargetLowering::LowerCallResult(
5204     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5205     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5206     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5207   SmallVector<CCValAssign, 16> RVLocs;
5208   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5209                     *DAG.getContext());
5210 
5211   CCRetInfo.AnalyzeCallResult(
5212       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5213                ? RetCC_PPC_Cold
5214                : RetCC_PPC);
5215 
5216   // Copy all of the result registers out of their specified physreg.
5217   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5218     CCValAssign &VA = RVLocs[i];
5219     assert(VA.isRegLoc() && "Can only return in registers!");
5220 
5221     SDValue Val;
5222 
5223     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5224       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5225                                       InFlag);
5226       Chain = Lo.getValue(1);
5227       InFlag = Lo.getValue(2);
5228       VA = RVLocs[++i]; // skip ahead to next loc
5229       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5230                                       InFlag);
5231       Chain = Hi.getValue(1);
5232       InFlag = Hi.getValue(2);
5233       if (!Subtarget.isLittleEndian())
5234         std::swap (Lo, Hi);
5235       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5236     } else {
5237       Val = DAG.getCopyFromReg(Chain, dl,
5238                                VA.getLocReg(), VA.getLocVT(), InFlag);
5239       Chain = Val.getValue(1);
5240       InFlag = Val.getValue(2);
5241     }
5242 
5243     switch (VA.getLocInfo()) {
5244     default: llvm_unreachable("Unknown loc info!");
5245     case CCValAssign::Full: break;
5246     case CCValAssign::AExt:
5247       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5248       break;
5249     case CCValAssign::ZExt:
5250       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5251                         DAG.getValueType(VA.getValVT()));
5252       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5253       break;
5254     case CCValAssign::SExt:
5255       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5256                         DAG.getValueType(VA.getValVT()));
5257       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5258       break;
5259     }
5260 
5261     InVals.push_back(Val);
5262   }
5263 
5264   return Chain;
5265 }
5266 
5267 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5268                            const PPCSubtarget &Subtarget, bool isPatchPoint) {
5269   // PatchPoint calls are not indirect.
5270   if (isPatchPoint)
5271     return false;
5272 
5273   if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
5274     return false;
5275 
5276   // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5277   // becuase the immediate function pointer points to a descriptor instead of
5278   // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5279   // pointer immediate points to the global entry point, while the BLA would
5280   // need to jump to the local entry point (see rL211174).
5281   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5282       isBLACompatibleAddress(Callee, DAG))
5283     return false;
5284 
5285   return true;
5286 }
5287 
5288 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5289 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5290   return Subtarget.isAIXABI() ||
5291          (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5292 }
5293 
5294 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5295                               const Function &Caller,
5296                               const SDValue &Callee,
5297                               const PPCSubtarget &Subtarget,
5298                               const TargetMachine &TM) {
5299   if (CFlags.IsTailCall)
5300     return PPCISD::TC_RETURN;
5301 
5302   // This is a call through a function pointer.
5303   if (CFlags.IsIndirect) {
5304     // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5305     // indirect calls. The save of the caller's TOC pointer to the stack will be
5306     // inserted into the DAG as part of call lowering. The restore of the TOC
5307     // pointer is modeled by using a pseudo instruction for the call opcode that
5308     // represents the 2 instruction sequence of an indirect branch and link,
5309     // immediately followed by a load of the TOC pointer from the the stack save
5310     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5311     // as it is not saved or used.
5312     return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5313                                                : PPCISD::BCTRL;
5314   }
5315 
5316   if (Subtarget.isUsingPCRelativeCalls()) {
5317     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5318     return PPCISD::CALL_NOTOC;
5319   }
5320 
5321   // The ABIs that maintain a TOC pointer accross calls need to have a nop
5322   // immediately following the call instruction if the caller and callee may
5323   // have different TOC bases. At link time if the linker determines the calls
5324   // may not share a TOC base, the call is redirected to a trampoline inserted
5325   // by the linker. The trampoline will (among other things) save the callers
5326   // TOC pointer at an ABI designated offset in the linkage area and the linker
5327   // will rewrite the nop to be a load of the TOC pointer from the linkage area
5328   // into gpr2.
5329   if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5330     return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5331                                                   : PPCISD::CALL_NOP;
5332 
5333   return PPCISD::CALL;
5334 }
5335 
5336 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5337                                const SDLoc &dl, const PPCSubtarget &Subtarget) {
5338   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5339     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5340       return SDValue(Dest, 0);
5341 
5342   // Returns true if the callee is local, and false otherwise.
5343   auto isLocalCallee = [&]() {
5344     const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5345     const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5346     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5347 
5348     return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5349            !dyn_cast_or_null<GlobalIFunc>(GV);
5350   };
5351 
5352   // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5353   // a static relocation model causes some versions of GNU LD (2.17.50, at
5354   // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5355   // built with secure-PLT.
5356   bool UsePlt =
5357       Subtarget.is32BitELFABI() && !isLocalCallee() &&
5358       Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5359 
5360   const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5361     const TargetMachine &TM = Subtarget.getTargetMachine();
5362     const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5363     MCSymbolXCOFF *S =
5364         cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5365 
5366     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5367     return DAG.getMCSymbol(S, PtrVT);
5368   };
5369 
5370   if (isFunctionGlobalAddress(Callee)) {
5371     const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5372 
5373     if (Subtarget.isAIXABI()) {
5374       assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5375       return getAIXFuncEntryPointSymbolSDNode(GV);
5376     }
5377     return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5378                                       UsePlt ? PPCII::MO_PLT : 0);
5379   }
5380 
5381   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5382     const char *SymName = S->getSymbol();
5383     if (Subtarget.isAIXABI()) {
5384       // If there exists a user-declared function whose name is the same as the
5385       // ExternalSymbol's, then we pick up the user-declared version.
5386       const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5387       if (const Function *F =
5388               dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5389         return getAIXFuncEntryPointSymbolSDNode(F);
5390 
5391       // On AIX, direct function calls reference the symbol for the function's
5392       // entry point, which is named by prepending a "." before the function's
5393       // C-linkage name. A Qualname is returned here because an external
5394       // function entry point is a csect with XTY_ER property.
5395       const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5396         auto &Context = DAG.getMachineFunction().getMMI().getContext();
5397         MCSectionXCOFF *Sec = Context.getXCOFFSection(
5398             (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5399             SectionKind::getMetadata());
5400         return Sec->getQualNameSymbol();
5401       };
5402 
5403       SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5404     }
5405     return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5406                                        UsePlt ? PPCII::MO_PLT : 0);
5407   }
5408 
5409   // No transformation needed.
5410   assert(Callee.getNode() && "What no callee?");
5411   return Callee;
5412 }
5413 
5414 static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5415   assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5416          "Expected a CALLSEQ_STARTSDNode.");
5417 
5418   // The last operand is the chain, except when the node has glue. If the node
5419   // has glue, then the last operand is the glue, and the chain is the second
5420   // last operand.
5421   SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5422   if (LastValue.getValueType() != MVT::Glue)
5423     return LastValue;
5424 
5425   return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5426 }
5427 
5428 // Creates the node that moves a functions address into the count register
5429 // to prepare for an indirect call instruction.
5430 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5431                                 SDValue &Glue, SDValue &Chain,
5432                                 const SDLoc &dl) {
5433   SDValue MTCTROps[] = {Chain, Callee, Glue};
5434   EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5435   Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5436                       makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5437   // The glue is the second value produced.
5438   Glue = Chain.getValue(1);
5439 }
5440 
5441 static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5442                                           SDValue &Glue, SDValue &Chain,
5443                                           SDValue CallSeqStart,
5444                                           const CallBase *CB, const SDLoc &dl,
5445                                           bool hasNest,
5446                                           const PPCSubtarget &Subtarget) {
5447   // Function pointers in the 64-bit SVR4 ABI do not point to the function
5448   // entry point, but to the function descriptor (the function entry point
5449   // address is part of the function descriptor though).
5450   // The function descriptor is a three doubleword structure with the
5451   // following fields: function entry point, TOC base address and
5452   // environment pointer.
5453   // Thus for a call through a function pointer, the following actions need
5454   // to be performed:
5455   //   1. Save the TOC of the caller in the TOC save area of its stack
5456   //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5457   //   2. Load the address of the function entry point from the function
5458   //      descriptor.
5459   //   3. Load the TOC of the callee from the function descriptor into r2.
5460   //   4. Load the environment pointer from the function descriptor into
5461   //      r11.
5462   //   5. Branch to the function entry point address.
5463   //   6. On return of the callee, the TOC of the caller needs to be
5464   //      restored (this is done in FinishCall()).
5465   //
5466   // The loads are scheduled at the beginning of the call sequence, and the
5467   // register copies are flagged together to ensure that no other
5468   // operations can be scheduled in between. E.g. without flagging the
5469   // copies together, a TOC access in the caller could be scheduled between
5470   // the assignment of the callee TOC and the branch to the callee, which leads
5471   // to incorrect code.
5472 
5473   // Start by loading the function address from the descriptor.
5474   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5475   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5476                       ? (MachineMemOperand::MODereferenceable |
5477                          MachineMemOperand::MOInvariant)
5478                       : MachineMemOperand::MONone;
5479 
5480   MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5481 
5482   // Registers used in building the DAG.
5483   const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5484   const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5485 
5486   // Offsets of descriptor members.
5487   const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5488   const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5489 
5490   const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5491   const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5492 
5493   // One load for the functions entry point address.
5494   SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5495                                     Alignment, MMOFlags);
5496 
5497   // One for loading the TOC anchor for the module that contains the called
5498   // function.
5499   SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5500   SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5501   SDValue TOCPtr =
5502       DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5503                   MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5504 
5505   // One for loading the environment pointer.
5506   SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5507   SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5508   SDValue LoadEnvPtr =
5509       DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5510                   MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5511 
5512 
5513   // Then copy the newly loaded TOC anchor to the TOC pointer.
5514   SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5515   Chain = TOCVal.getValue(0);
5516   Glue = TOCVal.getValue(1);
5517 
5518   // If the function call has an explicit 'nest' parameter, it takes the
5519   // place of the environment pointer.
5520   assert((!hasNest || !Subtarget.isAIXABI()) &&
5521          "Nest parameter is not supported on AIX.");
5522   if (!hasNest) {
5523     SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5524     Chain = EnvVal.getValue(0);
5525     Glue = EnvVal.getValue(1);
5526   }
5527 
5528   // The rest of the indirect call sequence is the same as the non-descriptor
5529   // DAG.
5530   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5531 }
5532 
5533 static void
5534 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5535                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5536                   SelectionDAG &DAG,
5537                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5538                   SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5539                   const PPCSubtarget &Subtarget) {
5540   const bool IsPPC64 = Subtarget.isPPC64();
5541   // MVT for a general purpose register.
5542   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5543 
5544   // First operand is always the chain.
5545   Ops.push_back(Chain);
5546 
5547   // If it's a direct call pass the callee as the second operand.
5548   if (!CFlags.IsIndirect)
5549     Ops.push_back(Callee);
5550   else {
5551     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5552 
5553     // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5554     // on the stack (this would have been done in `LowerCall_64SVR4` or
5555     // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5556     // represents both the indirect branch and a load that restores the TOC
5557     // pointer from the linkage area. The operand for the TOC restore is an add
5558     // of the TOC save offset to the stack pointer. This must be the second
5559     // operand: after the chain input but before any other variadic arguments.
5560     // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5561     // saved or used.
5562     if (isTOCSaveRestoreRequired(Subtarget)) {
5563       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5564 
5565       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5566       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5567       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5568       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5569       Ops.push_back(AddTOC);
5570     }
5571 
5572     // Add the register used for the environment pointer.
5573     if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5574       Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5575                                     RegVT));
5576 
5577 
5578     // Add CTR register as callee so a bctr can be emitted later.
5579     if (CFlags.IsTailCall)
5580       Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5581   }
5582 
5583   // If this is a tail call add stack pointer delta.
5584   if (CFlags.IsTailCall)
5585     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5586 
5587   // Add argument registers to the end of the list so that they are known live
5588   // into the call.
5589   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5590     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5591                                   RegsToPass[i].second.getValueType()));
5592 
5593   // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5594   // no way to mark dependencies as implicit here.
5595   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5596   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5597        !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5598     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5599 
5600   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5601   if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5602     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5603 
5604   // Add a register mask operand representing the call-preserved registers.
5605   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5606   const uint32_t *Mask =
5607       TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5608   assert(Mask && "Missing call preserved mask for calling convention");
5609   Ops.push_back(DAG.getRegisterMask(Mask));
5610 
5611   // If the glue is valid, it is the last operand.
5612   if (Glue.getNode())
5613     Ops.push_back(Glue);
5614 }
5615 
5616 SDValue PPCTargetLowering::FinishCall(
5617     CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5618     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5619     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5620     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5621     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5622 
5623   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5624       Subtarget.isAIXABI())
5625     setUsesTOCBasePtr(DAG);
5626 
5627   unsigned CallOpc =
5628       getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5629                     Subtarget, DAG.getTarget());
5630 
5631   if (!CFlags.IsIndirect)
5632     Callee = transformCallee(Callee, DAG, dl, Subtarget);
5633   else if (Subtarget.usesFunctionDescriptors())
5634     prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5635                                   dl, CFlags.HasNest, Subtarget);
5636   else
5637     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5638 
5639   // Build the operand list for the call instruction.
5640   SmallVector<SDValue, 8> Ops;
5641   buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5642                     SPDiff, Subtarget);
5643 
5644   // Emit tail call.
5645   if (CFlags.IsTailCall) {
5646     // Indirect tail call when using PC Relative calls do not have the same
5647     // constraints.
5648     assert(((Callee.getOpcode() == ISD::Register &&
5649              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5650             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5651             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5652             isa<ConstantSDNode>(Callee) ||
5653             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5654            "Expecting a global address, external symbol, absolute value, "
5655            "register or an indirect tail call when PC Relative calls are "
5656            "used.");
5657     // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5658     assert(CallOpc == PPCISD::TC_RETURN &&
5659            "Unexpected call opcode for a tail call.");
5660     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5661     return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5662   }
5663 
5664   std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5665   Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5666   DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5667   Glue = Chain.getValue(1);
5668 
5669   // When performing tail call optimization the callee pops its arguments off
5670   // the stack. Account for this here so these bytes can be pushed back on in
5671   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5672   int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5673                          getTargetMachine().Options.GuaranteedTailCallOpt)
5674                             ? NumBytes
5675                             : 0;
5676 
5677   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5678                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5679                              Glue, dl);
5680   Glue = Chain.getValue(1);
5681 
5682   return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5683                          DAG, InVals);
5684 }
5685 
5686 SDValue
5687 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5688                              SmallVectorImpl<SDValue> &InVals) const {
5689   SelectionDAG &DAG                     = CLI.DAG;
5690   SDLoc &dl                             = CLI.DL;
5691   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5692   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5693   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5694   SDValue Chain                         = CLI.Chain;
5695   SDValue Callee                        = CLI.Callee;
5696   bool &isTailCall                      = CLI.IsTailCall;
5697   CallingConv::ID CallConv              = CLI.CallConv;
5698   bool isVarArg                         = CLI.IsVarArg;
5699   bool isPatchPoint                     = CLI.IsPatchPoint;
5700   const CallBase *CB                    = CLI.CB;
5701 
5702   if (isTailCall) {
5703     if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5704       isTailCall = false;
5705     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5706       isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5707           Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5708     else
5709       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5710                                                      Ins, DAG);
5711     if (isTailCall) {
5712       ++NumTailCalls;
5713       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5714         ++NumSiblingCalls;
5715 
5716       // PC Relative calls no longer guarantee that the callee is a Global
5717       // Address Node. The callee could be an indirect tail call in which
5718       // case the SDValue for the callee could be a load (to load the address
5719       // of a function pointer) or it may be a register copy (to move the
5720       // address of the callee from a function parameter into a virtual
5721       // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5722       assert((Subtarget.isUsingPCRelativeCalls() ||
5723               isa<GlobalAddressSDNode>(Callee)) &&
5724              "Callee should be an llvm::Function object.");
5725 
5726       LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5727                         << "\nTCO callee: ");
5728       LLVM_DEBUG(Callee.dump());
5729     }
5730   }
5731 
5732   if (!isTailCall && CB && CB->isMustTailCall())
5733     report_fatal_error("failed to perform tail call elimination on a call "
5734                        "site marked musttail");
5735 
5736   // When long calls (i.e. indirect calls) are always used, calls are always
5737   // made via function pointer. If we have a function name, first translate it
5738   // into a pointer.
5739   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5740       !isTailCall)
5741     Callee = LowerGlobalAddress(Callee, DAG);
5742 
5743   CallFlags CFlags(
5744       CallConv, isTailCall, isVarArg, isPatchPoint,
5745       isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5746       // hasNest
5747       Subtarget.is64BitELFABI() &&
5748           any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5749       CLI.NoMerge);
5750 
5751   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5752     return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5753                             InVals, CB);
5754 
5755   if (Subtarget.isSVR4ABI())
5756     return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5757                             InVals, CB);
5758 
5759   if (Subtarget.isAIXABI())
5760     return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5761                          InVals, CB);
5762 
5763   return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5764                           InVals, CB);
5765 }
5766 
5767 SDValue PPCTargetLowering::LowerCall_32SVR4(
5768     SDValue Chain, SDValue Callee, CallFlags CFlags,
5769     const SmallVectorImpl<ISD::OutputArg> &Outs,
5770     const SmallVectorImpl<SDValue> &OutVals,
5771     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5772     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5773     const CallBase *CB) const {
5774   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5775   // of the 32-bit SVR4 ABI stack frame layout.
5776 
5777   const CallingConv::ID CallConv = CFlags.CallConv;
5778   const bool IsVarArg = CFlags.IsVarArg;
5779   const bool IsTailCall = CFlags.IsTailCall;
5780 
5781   assert((CallConv == CallingConv::C ||
5782           CallConv == CallingConv::Cold ||
5783           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5784 
5785   const Align PtrAlign(4);
5786 
5787   MachineFunction &MF = DAG.getMachineFunction();
5788 
5789   // Mark this function as potentially containing a function that contains a
5790   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5791   // and restoring the callers stack pointer in this functions epilog. This is
5792   // done because by tail calling the called function might overwrite the value
5793   // in this function's (MF) stack pointer stack slot 0(SP).
5794   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5795       CallConv == CallingConv::Fast)
5796     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5797 
5798   // Count how many bytes are to be pushed on the stack, including the linkage
5799   // area, parameter list area and the part of the local variable space which
5800   // contains copies of aggregates which are passed by value.
5801 
5802   // Assign locations to all of the outgoing arguments.
5803   SmallVector<CCValAssign, 16> ArgLocs;
5804   PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5805 
5806   // Reserve space for the linkage area on the stack.
5807   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5808                        PtrAlign);
5809   if (useSoftFloat())
5810     CCInfo.PreAnalyzeCallOperands(Outs);
5811 
5812   if (IsVarArg) {
5813     // Handle fixed and variable vector arguments differently.
5814     // Fixed vector arguments go into registers as long as registers are
5815     // available. Variable vector arguments always go into memory.
5816     unsigned NumArgs = Outs.size();
5817 
5818     for (unsigned i = 0; i != NumArgs; ++i) {
5819       MVT ArgVT = Outs[i].VT;
5820       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5821       bool Result;
5822 
5823       if (Outs[i].IsFixed) {
5824         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5825                                CCInfo);
5826       } else {
5827         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5828                                       ArgFlags, CCInfo);
5829       }
5830 
5831       if (Result) {
5832 #ifndef NDEBUG
5833         errs() << "Call operand #" << i << " has unhandled type "
5834              << EVT(ArgVT).getEVTString() << "\n";
5835 #endif
5836         llvm_unreachable(nullptr);
5837       }
5838     }
5839   } else {
5840     // All arguments are treated the same.
5841     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5842   }
5843   CCInfo.clearWasPPCF128();
5844 
5845   // Assign locations to all of the outgoing aggregate by value arguments.
5846   SmallVector<CCValAssign, 16> ByValArgLocs;
5847   CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5848 
5849   // Reserve stack space for the allocations in CCInfo.
5850   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5851 
5852   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5853 
5854   // Size of the linkage area, parameter list area and the part of the local
5855   // space variable where copies of aggregates which are passed by value are
5856   // stored.
5857   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5858 
5859   // Calculate by how many bytes the stack has to be adjusted in case of tail
5860   // call optimization.
5861   int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5862 
5863   // Adjust the stack pointer for the new arguments...
5864   // These operations are automatically eliminated by the prolog/epilog pass
5865   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5866   SDValue CallSeqStart = Chain;
5867 
5868   // Load the return address and frame pointer so it can be moved somewhere else
5869   // later.
5870   SDValue LROp, FPOp;
5871   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5872 
5873   // Set up a copy of the stack pointer for use loading and storing any
5874   // arguments that may not fit in the registers available for argument
5875   // passing.
5876   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5877 
5878   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5879   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5880   SmallVector<SDValue, 8> MemOpChains;
5881 
5882   bool seenFloatArg = false;
5883   // Walk the register/memloc assignments, inserting copies/loads.
5884   // i - Tracks the index into the list of registers allocated for the call
5885   // RealArgIdx - Tracks the index into the list of actual function arguments
5886   // j - Tracks the index into the list of byval arguments
5887   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5888        i != e;
5889        ++i, ++RealArgIdx) {
5890     CCValAssign &VA = ArgLocs[i];
5891     SDValue Arg = OutVals[RealArgIdx];
5892     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5893 
5894     if (Flags.isByVal()) {
5895       // Argument is an aggregate which is passed by value, thus we need to
5896       // create a copy of it in the local variable space of the current stack
5897       // frame (which is the stack frame of the caller) and pass the address of
5898       // this copy to the callee.
5899       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5900       CCValAssign &ByValVA = ByValArgLocs[j++];
5901       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5902 
5903       // Memory reserved in the local variable space of the callers stack frame.
5904       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5905 
5906       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5907       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5908                            StackPtr, PtrOff);
5909 
5910       // Create a copy of the argument in the local area of the current
5911       // stack frame.
5912       SDValue MemcpyCall =
5913         CreateCopyOfByValArgument(Arg, PtrOff,
5914                                   CallSeqStart.getNode()->getOperand(0),
5915                                   Flags, DAG, dl);
5916 
5917       // This must go outside the CALLSEQ_START..END.
5918       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5919                                                      SDLoc(MemcpyCall));
5920       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5921                              NewCallSeqStart.getNode());
5922       Chain = CallSeqStart = NewCallSeqStart;
5923 
5924       // Pass the address of the aggregate copy on the stack either in a
5925       // physical register or in the parameter list area of the current stack
5926       // frame to the callee.
5927       Arg = PtrOff;
5928     }
5929 
5930     // When useCRBits() is true, there can be i1 arguments.
5931     // It is because getRegisterType(MVT::i1) => MVT::i1,
5932     // and for other integer types getRegisterType() => MVT::i32.
5933     // Extend i1 and ensure callee will get i32.
5934     if (Arg.getValueType() == MVT::i1)
5935       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5936                         dl, MVT::i32, Arg);
5937 
5938     if (VA.isRegLoc()) {
5939       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5940       // Put argument in a physical register.
5941       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5942         bool IsLE = Subtarget.isLittleEndian();
5943         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5944                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5945         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5946         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5947                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5948         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5949                              SVal.getValue(0)));
5950       } else
5951         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5952     } else {
5953       // Put argument in the parameter list area of the current stack frame.
5954       assert(VA.isMemLoc());
5955       unsigned LocMemOffset = VA.getLocMemOffset();
5956 
5957       if (!IsTailCall) {
5958         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5959         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5960                              StackPtr, PtrOff);
5961 
5962         MemOpChains.push_back(
5963             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5964       } else {
5965         // Calculate and remember argument location.
5966         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5967                                  TailCallArguments);
5968       }
5969     }
5970   }
5971 
5972   if (!MemOpChains.empty())
5973     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5974 
5975   // Build a sequence of copy-to-reg nodes chained together with token chain
5976   // and flag operands which copy the outgoing args into the appropriate regs.
5977   SDValue InFlag;
5978   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5979     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5980                              RegsToPass[i].second, InFlag);
5981     InFlag = Chain.getValue(1);
5982   }
5983 
5984   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5985   // registers.
5986   if (IsVarArg) {
5987     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5988     SDValue Ops[] = { Chain, InFlag };
5989 
5990     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5991                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5992 
5993     InFlag = Chain.getValue(1);
5994   }
5995 
5996   if (IsTailCall)
5997     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5998                     TailCallArguments);
5999 
6000   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6001                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6002 }
6003 
6004 // Copy an argument into memory, being careful to do this outside the
6005 // call sequence for the call to which the argument belongs.
6006 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6007     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6008     SelectionDAG &DAG, const SDLoc &dl) const {
6009   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6010                         CallSeqStart.getNode()->getOperand(0),
6011                         Flags, DAG, dl);
6012   // The MEMCPY must go outside the CALLSEQ_START..END.
6013   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6014   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6015                                                  SDLoc(MemcpyCall));
6016   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6017                          NewCallSeqStart.getNode());
6018   return NewCallSeqStart;
6019 }
6020 
6021 SDValue PPCTargetLowering::LowerCall_64SVR4(
6022     SDValue Chain, SDValue Callee, CallFlags CFlags,
6023     const SmallVectorImpl<ISD::OutputArg> &Outs,
6024     const SmallVectorImpl<SDValue> &OutVals,
6025     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6026     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6027     const CallBase *CB) const {
6028   bool isELFv2ABI = Subtarget.isELFv2ABI();
6029   bool isLittleEndian = Subtarget.isLittleEndian();
6030   unsigned NumOps = Outs.size();
6031   bool IsSibCall = false;
6032   bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6033 
6034   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6035   unsigned PtrByteSize = 8;
6036 
6037   MachineFunction &MF = DAG.getMachineFunction();
6038 
6039   if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6040     IsSibCall = true;
6041 
6042   // Mark this function as potentially containing a function that contains a
6043   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6044   // and restoring the callers stack pointer in this functions epilog. This is
6045   // done because by tail calling the called function might overwrite the value
6046   // in this function's (MF) stack pointer stack slot 0(SP).
6047   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6048     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6049 
6050   assert(!(IsFastCall && CFlags.IsVarArg) &&
6051          "fastcc not supported on varargs functions");
6052 
6053   // Count how many bytes are to be pushed on the stack, including the linkage
6054   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
6055   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6056   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6057   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6058   unsigned NumBytes = LinkageSize;
6059   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6060 
6061   static const MCPhysReg GPR[] = {
6062     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6063     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6064   };
6065   static const MCPhysReg VR[] = {
6066     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6067     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6068   };
6069 
6070   const unsigned NumGPRs = array_lengthof(GPR);
6071   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6072   const unsigned NumVRs  = array_lengthof(VR);
6073 
6074   // On ELFv2, we can avoid allocating the parameter area if all the arguments
6075   // can be passed to the callee in registers.
6076   // For the fast calling convention, there is another check below.
6077   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6078   bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6079   if (!HasParameterArea) {
6080     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6081     unsigned AvailableFPRs = NumFPRs;
6082     unsigned AvailableVRs = NumVRs;
6083     unsigned NumBytesTmp = NumBytes;
6084     for (unsigned i = 0; i != NumOps; ++i) {
6085       if (Outs[i].Flags.isNest()) continue;
6086       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6087                                  PtrByteSize, LinkageSize, ParamAreaSize,
6088                                  NumBytesTmp, AvailableFPRs, AvailableVRs))
6089         HasParameterArea = true;
6090     }
6091   }
6092 
6093   // When using the fast calling convention, we don't provide backing for
6094   // arguments that will be in registers.
6095   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6096 
6097   // Avoid allocating parameter area for fastcc functions if all the arguments
6098   // can be passed in the registers.
6099   if (IsFastCall)
6100     HasParameterArea = false;
6101 
6102   // Add up all the space actually used.
6103   for (unsigned i = 0; i != NumOps; ++i) {
6104     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6105     EVT ArgVT = Outs[i].VT;
6106     EVT OrigVT = Outs[i].ArgVT;
6107 
6108     if (Flags.isNest())
6109       continue;
6110 
6111     if (IsFastCall) {
6112       if (Flags.isByVal()) {
6113         NumGPRsUsed += (Flags.getByValSize()+7)/8;
6114         if (NumGPRsUsed > NumGPRs)
6115           HasParameterArea = true;
6116       } else {
6117         switch (ArgVT.getSimpleVT().SimpleTy) {
6118         default: llvm_unreachable("Unexpected ValueType for argument!");
6119         case MVT::i1:
6120         case MVT::i32:
6121         case MVT::i64:
6122           if (++NumGPRsUsed <= NumGPRs)
6123             continue;
6124           break;
6125         case MVT::v4i32:
6126         case MVT::v8i16:
6127         case MVT::v16i8:
6128         case MVT::v2f64:
6129         case MVT::v2i64:
6130         case MVT::v1i128:
6131         case MVT::f128:
6132           if (++NumVRsUsed <= NumVRs)
6133             continue;
6134           break;
6135         case MVT::v4f32:
6136           if (++NumVRsUsed <= NumVRs)
6137             continue;
6138           break;
6139         case MVT::f32:
6140         case MVT::f64:
6141           if (++NumFPRsUsed <= NumFPRs)
6142             continue;
6143           break;
6144         }
6145         HasParameterArea = true;
6146       }
6147     }
6148 
6149     /* Respect alignment of argument on the stack.  */
6150     auto Alignement =
6151         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6152     NumBytes = alignTo(NumBytes, Alignement);
6153 
6154     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6155     if (Flags.isInConsecutiveRegsLast())
6156       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6157   }
6158 
6159   unsigned NumBytesActuallyUsed = NumBytes;
6160 
6161   // In the old ELFv1 ABI,
6162   // the prolog code of the callee may store up to 8 GPR argument registers to
6163   // the stack, allowing va_start to index over them in memory if its varargs.
6164   // Because we cannot tell if this is needed on the caller side, we have to
6165   // conservatively assume that it is needed.  As such, make sure we have at
6166   // least enough stack space for the caller to store the 8 GPRs.
6167   // In the ELFv2 ABI, we allocate the parameter area iff a callee
6168   // really requires memory operands, e.g. a vararg function.
6169   if (HasParameterArea)
6170     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6171   else
6172     NumBytes = LinkageSize;
6173 
6174   // Tail call needs the stack to be aligned.
6175   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6176     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6177 
6178   int SPDiff = 0;
6179 
6180   // Calculate by how many bytes the stack has to be adjusted in case of tail
6181   // call optimization.
6182   if (!IsSibCall)
6183     SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6184 
6185   // To protect arguments on the stack from being clobbered in a tail call,
6186   // force all the loads to happen before doing any other lowering.
6187   if (CFlags.IsTailCall)
6188     Chain = DAG.getStackArgumentTokenFactor(Chain);
6189 
6190   // Adjust the stack pointer for the new arguments...
6191   // These operations are automatically eliminated by the prolog/epilog pass
6192   if (!IsSibCall)
6193     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6194   SDValue CallSeqStart = Chain;
6195 
6196   // Load the return address and frame pointer so it can be move somewhere else
6197   // later.
6198   SDValue LROp, FPOp;
6199   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6200 
6201   // Set up a copy of the stack pointer for use loading and storing any
6202   // arguments that may not fit in the registers available for argument
6203   // passing.
6204   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6205 
6206   // Figure out which arguments are going to go in registers, and which in
6207   // memory.  Also, if this is a vararg function, floating point operations
6208   // must be stored to our stack, and loaded into integer regs as well, if
6209   // any integer regs are available for argument passing.
6210   unsigned ArgOffset = LinkageSize;
6211 
6212   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6213   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6214 
6215   SmallVector<SDValue, 8> MemOpChains;
6216   for (unsigned i = 0; i != NumOps; ++i) {
6217     SDValue Arg = OutVals[i];
6218     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6219     EVT ArgVT = Outs[i].VT;
6220     EVT OrigVT = Outs[i].ArgVT;
6221 
6222     // PtrOff will be used to store the current argument to the stack if a
6223     // register cannot be found for it.
6224     SDValue PtrOff;
6225 
6226     // We re-align the argument offset for each argument, except when using the
6227     // fast calling convention, when we need to make sure we do that only when
6228     // we'll actually use a stack slot.
6229     auto ComputePtrOff = [&]() {
6230       /* Respect alignment of argument on the stack.  */
6231       auto Alignment =
6232           CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6233       ArgOffset = alignTo(ArgOffset, Alignment);
6234 
6235       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6236 
6237       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6238     };
6239 
6240     if (!IsFastCall) {
6241       ComputePtrOff();
6242 
6243       /* Compute GPR index associated with argument offset.  */
6244       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6245       GPR_idx = std::min(GPR_idx, NumGPRs);
6246     }
6247 
6248     // Promote integers to 64-bit values.
6249     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6250       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6251       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6252       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6253     }
6254 
6255     // FIXME memcpy is used way more than necessary.  Correctness first.
6256     // Note: "by value" is code for passing a structure by value, not
6257     // basic types.
6258     if (Flags.isByVal()) {
6259       // Note: Size includes alignment padding, so
6260       //   struct x { short a; char b; }
6261       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
6262       // These are the proper values we need for right-justifying the
6263       // aggregate in a parameter register.
6264       unsigned Size = Flags.getByValSize();
6265 
6266       // An empty aggregate parameter takes up no storage and no
6267       // registers.
6268       if (Size == 0)
6269         continue;
6270 
6271       if (IsFastCall)
6272         ComputePtrOff();
6273 
6274       // All aggregates smaller than 8 bytes must be passed right-justified.
6275       if (Size==1 || Size==2 || Size==4) {
6276         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6277         if (GPR_idx != NumGPRs) {
6278           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6279                                         MachinePointerInfo(), VT);
6280           MemOpChains.push_back(Load.getValue(1));
6281           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6282 
6283           ArgOffset += PtrByteSize;
6284           continue;
6285         }
6286       }
6287 
6288       if (GPR_idx == NumGPRs && Size < 8) {
6289         SDValue AddPtr = PtrOff;
6290         if (!isLittleEndian) {
6291           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6292                                           PtrOff.getValueType());
6293           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6294         }
6295         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6296                                                           CallSeqStart,
6297                                                           Flags, DAG, dl);
6298         ArgOffset += PtrByteSize;
6299         continue;
6300       }
6301       // Copy entire object into memory.  There are cases where gcc-generated
6302       // code assumes it is there, even if it could be put entirely into
6303       // registers.  (This is not what the doc says.)
6304 
6305       // FIXME: The above statement is likely due to a misunderstanding of the
6306       // documents.  All arguments must be copied into the parameter area BY
6307       // THE CALLEE in the event that the callee takes the address of any
6308       // formal argument.  That has not yet been implemented.  However, it is
6309       // reasonable to use the stack area as a staging area for the register
6310       // load.
6311 
6312       // Skip this for small aggregates, as we will use the same slot for a
6313       // right-justified copy, below.
6314       if (Size >= 8)
6315         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6316                                                           CallSeqStart,
6317                                                           Flags, DAG, dl);
6318 
6319       // When a register is available, pass a small aggregate right-justified.
6320       if (Size < 8 && GPR_idx != NumGPRs) {
6321         // The easiest way to get this right-justified in a register
6322         // is to copy the structure into the rightmost portion of a
6323         // local variable slot, then load the whole slot into the
6324         // register.
6325         // FIXME: The memcpy seems to produce pretty awful code for
6326         // small aggregates, particularly for packed ones.
6327         // FIXME: It would be preferable to use the slot in the
6328         // parameter save area instead of a new local variable.
6329         SDValue AddPtr = PtrOff;
6330         if (!isLittleEndian) {
6331           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6332           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6333         }
6334         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6335                                                           CallSeqStart,
6336                                                           Flags, DAG, dl);
6337 
6338         // Load the slot into the register.
6339         SDValue Load =
6340             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6341         MemOpChains.push_back(Load.getValue(1));
6342         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6343 
6344         // Done with this argument.
6345         ArgOffset += PtrByteSize;
6346         continue;
6347       }
6348 
6349       // For aggregates larger than PtrByteSize, copy the pieces of the
6350       // object that fit into registers from the parameter save area.
6351       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6352         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6353         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6354         if (GPR_idx != NumGPRs) {
6355           SDValue Load =
6356               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6357           MemOpChains.push_back(Load.getValue(1));
6358           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6359           ArgOffset += PtrByteSize;
6360         } else {
6361           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6362           break;
6363         }
6364       }
6365       continue;
6366     }
6367 
6368     switch (Arg.getSimpleValueType().SimpleTy) {
6369     default: llvm_unreachable("Unexpected ValueType for argument!");
6370     case MVT::i1:
6371     case MVT::i32:
6372     case MVT::i64:
6373       if (Flags.isNest()) {
6374         // The 'nest' parameter, if any, is passed in R11.
6375         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6376         break;
6377       }
6378 
6379       // These can be scalar arguments or elements of an integer array type
6380       // passed directly.  Clang may use those instead of "byval" aggregate
6381       // types to avoid forcing arguments to memory unnecessarily.
6382       if (GPR_idx != NumGPRs) {
6383         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6384       } else {
6385         if (IsFastCall)
6386           ComputePtrOff();
6387 
6388         assert(HasParameterArea &&
6389                "Parameter area must exist to pass an argument in memory.");
6390         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6391                          true, CFlags.IsTailCall, false, MemOpChains,
6392                          TailCallArguments, dl);
6393         if (IsFastCall)
6394           ArgOffset += PtrByteSize;
6395       }
6396       if (!IsFastCall)
6397         ArgOffset += PtrByteSize;
6398       break;
6399     case MVT::f32:
6400     case MVT::f64: {
6401       // These can be scalar arguments or elements of a float array type
6402       // passed directly.  The latter are used to implement ELFv2 homogenous
6403       // float aggregates.
6404 
6405       // Named arguments go into FPRs first, and once they overflow, the
6406       // remaining arguments go into GPRs and then the parameter save area.
6407       // Unnamed arguments for vararg functions always go to GPRs and
6408       // then the parameter save area.  For now, put all arguments to vararg
6409       // routines always in both locations (FPR *and* GPR or stack slot).
6410       bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6411       bool NeededLoad = false;
6412 
6413       // First load the argument into the next available FPR.
6414       if (FPR_idx != NumFPRs)
6415         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6416 
6417       // Next, load the argument into GPR or stack slot if needed.
6418       if (!NeedGPROrStack)
6419         ;
6420       else if (GPR_idx != NumGPRs && !IsFastCall) {
6421         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6422         // once we support fp <-> gpr moves.
6423 
6424         // In the non-vararg case, this can only ever happen in the
6425         // presence of f32 array types, since otherwise we never run
6426         // out of FPRs before running out of GPRs.
6427         SDValue ArgVal;
6428 
6429         // Double values are always passed in a single GPR.
6430         if (Arg.getValueType() != MVT::f32) {
6431           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6432 
6433         // Non-array float values are extended and passed in a GPR.
6434         } else if (!Flags.isInConsecutiveRegs()) {
6435           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6436           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6437 
6438         // If we have an array of floats, we collect every odd element
6439         // together with its predecessor into one GPR.
6440         } else if (ArgOffset % PtrByteSize != 0) {
6441           SDValue Lo, Hi;
6442           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6443           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6444           if (!isLittleEndian)
6445             std::swap(Lo, Hi);
6446           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6447 
6448         // The final element, if even, goes into the first half of a GPR.
6449         } else if (Flags.isInConsecutiveRegsLast()) {
6450           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6451           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6452           if (!isLittleEndian)
6453             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6454                                  DAG.getConstant(32, dl, MVT::i32));
6455 
6456         // Non-final even elements are skipped; they will be handled
6457         // together the with subsequent argument on the next go-around.
6458         } else
6459           ArgVal = SDValue();
6460 
6461         if (ArgVal.getNode())
6462           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6463       } else {
6464         if (IsFastCall)
6465           ComputePtrOff();
6466 
6467         // Single-precision floating-point values are mapped to the
6468         // second (rightmost) word of the stack doubleword.
6469         if (Arg.getValueType() == MVT::f32 &&
6470             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6471           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6472           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6473         }
6474 
6475         assert(HasParameterArea &&
6476                "Parameter area must exist to pass an argument in memory.");
6477         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6478                          true, CFlags.IsTailCall, false, MemOpChains,
6479                          TailCallArguments, dl);
6480 
6481         NeededLoad = true;
6482       }
6483       // When passing an array of floats, the array occupies consecutive
6484       // space in the argument area; only round up to the next doubleword
6485       // at the end of the array.  Otherwise, each float takes 8 bytes.
6486       if (!IsFastCall || NeededLoad) {
6487         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6488                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6489         if (Flags.isInConsecutiveRegsLast())
6490           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6491       }
6492       break;
6493     }
6494     case MVT::v4f32:
6495     case MVT::v4i32:
6496     case MVT::v8i16:
6497     case MVT::v16i8:
6498     case MVT::v2f64:
6499     case MVT::v2i64:
6500     case MVT::v1i128:
6501     case MVT::f128:
6502       // These can be scalar arguments or elements of a vector array type
6503       // passed directly.  The latter are used to implement ELFv2 homogenous
6504       // vector aggregates.
6505 
6506       // For a varargs call, named arguments go into VRs or on the stack as
6507       // usual; unnamed arguments always go to the stack or the corresponding
6508       // GPRs when within range.  For now, we always put the value in both
6509       // locations (or even all three).
6510       if (CFlags.IsVarArg) {
6511         assert(HasParameterArea &&
6512                "Parameter area must exist if we have a varargs call.");
6513         // We could elide this store in the case where the object fits
6514         // entirely in R registers.  Maybe later.
6515         SDValue Store =
6516             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6517         MemOpChains.push_back(Store);
6518         if (VR_idx != NumVRs) {
6519           SDValue Load =
6520               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6521           MemOpChains.push_back(Load.getValue(1));
6522           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6523         }
6524         ArgOffset += 16;
6525         for (unsigned i=0; i<16; i+=PtrByteSize) {
6526           if (GPR_idx == NumGPRs)
6527             break;
6528           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6529                                    DAG.getConstant(i, dl, PtrVT));
6530           SDValue Load =
6531               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6532           MemOpChains.push_back(Load.getValue(1));
6533           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6534         }
6535         break;
6536       }
6537 
6538       // Non-varargs Altivec params go into VRs or on the stack.
6539       if (VR_idx != NumVRs) {
6540         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6541       } else {
6542         if (IsFastCall)
6543           ComputePtrOff();
6544 
6545         assert(HasParameterArea &&
6546                "Parameter area must exist to pass an argument in memory.");
6547         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6548                          true, CFlags.IsTailCall, true, MemOpChains,
6549                          TailCallArguments, dl);
6550         if (IsFastCall)
6551           ArgOffset += 16;
6552       }
6553 
6554       if (!IsFastCall)
6555         ArgOffset += 16;
6556       break;
6557     }
6558   }
6559 
6560   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6561          "mismatch in size of parameter area");
6562   (void)NumBytesActuallyUsed;
6563 
6564   if (!MemOpChains.empty())
6565     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6566 
6567   // Check if this is an indirect call (MTCTR/BCTRL).
6568   // See prepareDescriptorIndirectCall and buildCallOperands for more
6569   // information about calls through function pointers in the 64-bit SVR4 ABI.
6570   if (CFlags.IsIndirect) {
6571     // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6572     // caller in the TOC save area.
6573     if (isTOCSaveRestoreRequired(Subtarget)) {
6574       assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6575       // Load r2 into a virtual register and store it to the TOC save area.
6576       setUsesTOCBasePtr(DAG);
6577       SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6578       // TOC save area offset.
6579       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6580       SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6581       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6582       Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6583                            MachinePointerInfo::getStack(
6584                                DAG.getMachineFunction(), TOCSaveOffset));
6585     }
6586     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6587     // This does not mean the MTCTR instruction must use R12; it's easier
6588     // to model this as an extra parameter, so do that.
6589     if (isELFv2ABI && !CFlags.IsPatchPoint)
6590       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6591   }
6592 
6593   // Build a sequence of copy-to-reg nodes chained together with token chain
6594   // and flag operands which copy the outgoing args into the appropriate regs.
6595   SDValue InFlag;
6596   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6597     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6598                              RegsToPass[i].second, InFlag);
6599     InFlag = Chain.getValue(1);
6600   }
6601 
6602   if (CFlags.IsTailCall && !IsSibCall)
6603     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6604                     TailCallArguments);
6605 
6606   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6607                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6608 }
6609 
6610 SDValue PPCTargetLowering::LowerCall_Darwin(
6611     SDValue Chain, SDValue Callee, CallFlags CFlags,
6612     const SmallVectorImpl<ISD::OutputArg> &Outs,
6613     const SmallVectorImpl<SDValue> &OutVals,
6614     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6615     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6616     const CallBase *CB) const {
6617   unsigned NumOps = Outs.size();
6618 
6619   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6620   bool isPPC64 = PtrVT == MVT::i64;
6621   unsigned PtrByteSize = isPPC64 ? 8 : 4;
6622 
6623   MachineFunction &MF = DAG.getMachineFunction();
6624 
6625   // Mark this function as potentially containing a function that contains a
6626   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6627   // and restoring the callers stack pointer in this functions epilog. This is
6628   // done because by tail calling the called function might overwrite the value
6629   // in this function's (MF) stack pointer stack slot 0(SP).
6630   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6631       CFlags.CallConv == CallingConv::Fast)
6632     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6633 
6634   // Count how many bytes are to be pushed on the stack, including the linkage
6635   // area, and parameter passing area.  We start with 24/48 bytes, which is
6636   // prereserved space for [SP][CR][LR][3 x unused].
6637   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6638   unsigned NumBytes = LinkageSize;
6639 
6640   // Add up all the space actually used.
6641   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6642   // they all go in registers, but we must reserve stack space for them for
6643   // possible use by the caller.  In varargs or 64-bit calls, parameters are
6644   // assigned stack space in order, with padding so Altivec parameters are
6645   // 16-byte aligned.
6646   unsigned nAltivecParamsAtEnd = 0;
6647   for (unsigned i = 0; i != NumOps; ++i) {
6648     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6649     EVT ArgVT = Outs[i].VT;
6650     // Varargs Altivec parameters are padded to a 16 byte boundary.
6651     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6652         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6653         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6654       if (!CFlags.IsVarArg && !isPPC64) {
6655         // Non-varargs Altivec parameters go after all the non-Altivec
6656         // parameters; handle those later so we know how much padding we need.
6657         nAltivecParamsAtEnd++;
6658         continue;
6659       }
6660       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6661       NumBytes = ((NumBytes+15)/16)*16;
6662     }
6663     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6664   }
6665 
6666   // Allow for Altivec parameters at the end, if needed.
6667   if (nAltivecParamsAtEnd) {
6668     NumBytes = ((NumBytes+15)/16)*16;
6669     NumBytes += 16*nAltivecParamsAtEnd;
6670   }
6671 
6672   // The prolog code of the callee may store up to 8 GPR argument registers to
6673   // the stack, allowing va_start to index over them in memory if its varargs.
6674   // Because we cannot tell if this is needed on the caller side, we have to
6675   // conservatively assume that it is needed.  As such, make sure we have at
6676   // least enough stack space for the caller to store the 8 GPRs.
6677   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6678 
6679   // Tail call needs the stack to be aligned.
6680   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6681       CFlags.CallConv == CallingConv::Fast)
6682     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6683 
6684   // Calculate by how many bytes the stack has to be adjusted in case of tail
6685   // call optimization.
6686   int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6687 
6688   // To protect arguments on the stack from being clobbered in a tail call,
6689   // force all the loads to happen before doing any other lowering.
6690   if (CFlags.IsTailCall)
6691     Chain = DAG.getStackArgumentTokenFactor(Chain);
6692 
6693   // Adjust the stack pointer for the new arguments...
6694   // These operations are automatically eliminated by the prolog/epilog pass
6695   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6696   SDValue CallSeqStart = Chain;
6697 
6698   // Load the return address and frame pointer so it can be move somewhere else
6699   // later.
6700   SDValue LROp, FPOp;
6701   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6702 
6703   // Set up a copy of the stack pointer for use loading and storing any
6704   // arguments that may not fit in the registers available for argument
6705   // passing.
6706   SDValue StackPtr;
6707   if (isPPC64)
6708     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6709   else
6710     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6711 
6712   // Figure out which arguments are going to go in registers, and which in
6713   // memory.  Also, if this is a vararg function, floating point operations
6714   // must be stored to our stack, and loaded into integer regs as well, if
6715   // any integer regs are available for argument passing.
6716   unsigned ArgOffset = LinkageSize;
6717   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6718 
6719   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6720     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6721     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6722   };
6723   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6724     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6725     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6726   };
6727   static const MCPhysReg VR[] = {
6728     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6729     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6730   };
6731   const unsigned NumGPRs = array_lengthof(GPR_32);
6732   const unsigned NumFPRs = 13;
6733   const unsigned NumVRs  = array_lengthof(VR);
6734 
6735   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6736 
6737   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6738   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6739 
6740   SmallVector<SDValue, 8> MemOpChains;
6741   for (unsigned i = 0; i != NumOps; ++i) {
6742     SDValue Arg = OutVals[i];
6743     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6744 
6745     // PtrOff will be used to store the current argument to the stack if a
6746     // register cannot be found for it.
6747     SDValue PtrOff;
6748 
6749     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6750 
6751     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6752 
6753     // On PPC64, promote integers to 64-bit values.
6754     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6755       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6756       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6757       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6758     }
6759 
6760     // FIXME memcpy is used way more than necessary.  Correctness first.
6761     // Note: "by value" is code for passing a structure by value, not
6762     // basic types.
6763     if (Flags.isByVal()) {
6764       unsigned Size = Flags.getByValSize();
6765       // Very small objects are passed right-justified.  Everything else is
6766       // passed left-justified.
6767       if (Size==1 || Size==2) {
6768         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6769         if (GPR_idx != NumGPRs) {
6770           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6771                                         MachinePointerInfo(), VT);
6772           MemOpChains.push_back(Load.getValue(1));
6773           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6774 
6775           ArgOffset += PtrByteSize;
6776         } else {
6777           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6778                                           PtrOff.getValueType());
6779           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6780           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6781                                                             CallSeqStart,
6782                                                             Flags, DAG, dl);
6783           ArgOffset += PtrByteSize;
6784         }
6785         continue;
6786       }
6787       // Copy entire object into memory.  There are cases where gcc-generated
6788       // code assumes it is there, even if it could be put entirely into
6789       // registers.  (This is not what the doc says.)
6790       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6791                                                         CallSeqStart,
6792                                                         Flags, DAG, dl);
6793 
6794       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6795       // copy the pieces of the object that fit into registers from the
6796       // parameter save area.
6797       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6798         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6799         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6800         if (GPR_idx != NumGPRs) {
6801           SDValue Load =
6802               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6803           MemOpChains.push_back(Load.getValue(1));
6804           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6805           ArgOffset += PtrByteSize;
6806         } else {
6807           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6808           break;
6809         }
6810       }
6811       continue;
6812     }
6813 
6814     switch (Arg.getSimpleValueType().SimpleTy) {
6815     default: llvm_unreachable("Unexpected ValueType for argument!");
6816     case MVT::i1:
6817     case MVT::i32:
6818     case MVT::i64:
6819       if (GPR_idx != NumGPRs) {
6820         if (Arg.getValueType() == MVT::i1)
6821           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6822 
6823         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6824       } else {
6825         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6826                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6827                          TailCallArguments, dl);
6828       }
6829       ArgOffset += PtrByteSize;
6830       break;
6831     case MVT::f32:
6832     case MVT::f64:
6833       if (FPR_idx != NumFPRs) {
6834         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6835 
6836         if (CFlags.IsVarArg) {
6837           SDValue Store =
6838               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6839           MemOpChains.push_back(Store);
6840 
6841           // Float varargs are always shadowed in available integer registers
6842           if (GPR_idx != NumGPRs) {
6843             SDValue Load =
6844                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6845             MemOpChains.push_back(Load.getValue(1));
6846             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6847           }
6848           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6849             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6850             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6851             SDValue Load =
6852                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6853             MemOpChains.push_back(Load.getValue(1));
6854             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6855           }
6856         } else {
6857           // If we have any FPRs remaining, we may also have GPRs remaining.
6858           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6859           // GPRs.
6860           if (GPR_idx != NumGPRs)
6861             ++GPR_idx;
6862           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6863               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6864             ++GPR_idx;
6865         }
6866       } else
6867         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6868                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6869                          TailCallArguments, dl);
6870       if (isPPC64)
6871         ArgOffset += 8;
6872       else
6873         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6874       break;
6875     case MVT::v4f32:
6876     case MVT::v4i32:
6877     case MVT::v8i16:
6878     case MVT::v16i8:
6879       if (CFlags.IsVarArg) {
6880         // These go aligned on the stack, or in the corresponding R registers
6881         // when within range.  The Darwin PPC ABI doc claims they also go in
6882         // V registers; in fact gcc does this only for arguments that are
6883         // prototyped, not for those that match the ...  We do it for all
6884         // arguments, seems to work.
6885         while (ArgOffset % 16 !=0) {
6886           ArgOffset += PtrByteSize;
6887           if (GPR_idx != NumGPRs)
6888             GPR_idx++;
6889         }
6890         // We could elide this store in the case where the object fits
6891         // entirely in R registers.  Maybe later.
6892         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6893                              DAG.getConstant(ArgOffset, dl, PtrVT));
6894         SDValue Store =
6895             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6896         MemOpChains.push_back(Store);
6897         if (VR_idx != NumVRs) {
6898           SDValue Load =
6899               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6900           MemOpChains.push_back(Load.getValue(1));
6901           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6902         }
6903         ArgOffset += 16;
6904         for (unsigned i=0; i<16; i+=PtrByteSize) {
6905           if (GPR_idx == NumGPRs)
6906             break;
6907           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6908                                    DAG.getConstant(i, dl, PtrVT));
6909           SDValue Load =
6910               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6911           MemOpChains.push_back(Load.getValue(1));
6912           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6913         }
6914         break;
6915       }
6916 
6917       // Non-varargs Altivec params generally go in registers, but have
6918       // stack space allocated at the end.
6919       if (VR_idx != NumVRs) {
6920         // Doesn't have GPR space allocated.
6921         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6922       } else if (nAltivecParamsAtEnd==0) {
6923         // We are emitting Altivec params in order.
6924         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6925                          isPPC64, CFlags.IsTailCall, true, MemOpChains,
6926                          TailCallArguments, dl);
6927         ArgOffset += 16;
6928       }
6929       break;
6930     }
6931   }
6932   // If all Altivec parameters fit in registers, as they usually do,
6933   // they get stack space following the non-Altivec parameters.  We
6934   // don't track this here because nobody below needs it.
6935   // If there are more Altivec parameters than fit in registers emit
6936   // the stores here.
6937   if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
6938     unsigned j = 0;
6939     // Offset is aligned; skip 1st 12 params which go in V registers.
6940     ArgOffset = ((ArgOffset+15)/16)*16;
6941     ArgOffset += 12*16;
6942     for (unsigned i = 0; i != NumOps; ++i) {
6943       SDValue Arg = OutVals[i];
6944       EVT ArgType = Outs[i].VT;
6945       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6946           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6947         if (++j > NumVRs) {
6948           SDValue PtrOff;
6949           // We are emitting Altivec params in order.
6950           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6951                            isPPC64, CFlags.IsTailCall, true, MemOpChains,
6952                            TailCallArguments, dl);
6953           ArgOffset += 16;
6954         }
6955       }
6956     }
6957   }
6958 
6959   if (!MemOpChains.empty())
6960     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6961 
6962   // On Darwin, R12 must contain the address of an indirect callee.  This does
6963   // not mean the MTCTR instruction must use R12; it's easier to model this as
6964   // an extra parameter, so do that.
6965   if (CFlags.IsIndirect) {
6966     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
6967     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6968                                                    PPC::R12), Callee));
6969   }
6970 
6971   // Build a sequence of copy-to-reg nodes chained together with token chain
6972   // and flag operands which copy the outgoing args into the appropriate regs.
6973   SDValue InFlag;
6974   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6975     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6976                              RegsToPass[i].second, InFlag);
6977     InFlag = Chain.getValue(1);
6978   }
6979 
6980   if (CFlags.IsTailCall)
6981     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6982                     TailCallArguments);
6983 
6984   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6985                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6986 }
6987 
6988 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6989                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6990                    CCState &State) {
6991 
6992   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6993       State.getMachineFunction().getSubtarget());
6994   const bool IsPPC64 = Subtarget.isPPC64();
6995   const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6996   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6997 
6998   if (ValVT.isVector() && !State.getMachineFunction()
6999                                .getTarget()
7000                                .Options.EnableAIXExtendedAltivecABI)
7001     report_fatal_error("the default Altivec AIX ABI is not yet supported");
7002 
7003   if (ValVT == MVT::f128)
7004     report_fatal_error("f128 is unimplemented on AIX.");
7005 
7006   if (ArgFlags.isNest())
7007     report_fatal_error("Nest arguments are unimplemented.");
7008 
7009   static const MCPhysReg GPR_32[] = {// 32-bit registers.
7010                                      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7011                                      PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7012   static const MCPhysReg GPR_64[] = {// 64-bit registers.
7013                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7014                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7015 
7016   static const MCPhysReg VR[] = {// Vector registers.
7017                                  PPC::V2,  PPC::V3,  PPC::V4,  PPC::V5,
7018                                  PPC::V6,  PPC::V7,  PPC::V8,  PPC::V9,
7019                                  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
7020 
7021   if (ArgFlags.isByVal()) {
7022     if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
7023       report_fatal_error("Pass-by-value arguments with alignment greater than "
7024                          "register width are not supported.");
7025 
7026     const unsigned ByValSize = ArgFlags.getByValSize();
7027 
7028     // An empty aggregate parameter takes up no storage and no registers,
7029     // but needs a MemLoc for a stack slot for the formal arguments side.
7030     if (ByValSize == 0) {
7031       State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
7032                                        State.getNextStackOffset(), RegVT,
7033                                        LocInfo));
7034       return false;
7035     }
7036 
7037     const unsigned StackSize = alignTo(ByValSize, PtrAlign);
7038     unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
7039     for (const unsigned E = Offset + StackSize; Offset < E;
7040          Offset += PtrAlign.value()) {
7041       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
7042         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7043       else {
7044         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
7045                                          Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
7046                                          LocInfo));
7047         break;
7048       }
7049     }
7050     return false;
7051   }
7052 
7053   // Arguments always reserve parameter save area.
7054   switch (ValVT.SimpleTy) {
7055   default:
7056     report_fatal_error("Unhandled value type for argument.");
7057   case MVT::i64:
7058     // i64 arguments should have been split to i32 for PPC32.
7059     assert(IsPPC64 && "PPC32 should have split i64 values.");
7060     LLVM_FALLTHROUGH;
7061   case MVT::i1:
7062   case MVT::i32: {
7063     const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
7064     // AIX integer arguments are always passed in register width.
7065     if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
7066       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
7067                                   : CCValAssign::LocInfo::ZExt;
7068     if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
7069       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7070     else
7071       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
7072 
7073     return false;
7074   }
7075   case MVT::f32:
7076   case MVT::f64: {
7077     // Parameter save area (PSA) is reserved even if the float passes in fpr.
7078     const unsigned StoreSize = LocVT.getStoreSize();
7079     // Floats are always 4-byte aligned in the PSA on AIX.
7080     // This includes f64 in 64-bit mode for ABI compatibility.
7081     const unsigned Offset =
7082         State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
7083     unsigned FReg = State.AllocateReg(FPR);
7084     if (FReg)
7085       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7086 
7087     // Reserve and initialize GPRs or initialize the PSA as required.
7088     for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
7089       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
7090         assert(FReg && "An FPR should be available when a GPR is reserved.");
7091         if (State.isVarArg()) {
7092           // Successfully reserved GPRs are only initialized for vararg calls.
7093           // Custom handling is required for:
7094           //   f64 in PPC32 needs to be split into 2 GPRs.
7095           //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7096           State.addLoc(
7097               CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7098         }
7099       } else {
7100         // If there are insufficient GPRs, the PSA needs to be initialized.
7101         // Initialization occurs even if an FPR was initialized for
7102         // compatibility with the AIX XL compiler. The full memory for the
7103         // argument will be initialized even if a prior word is saved in GPR.
7104         // A custom memLoc is used when the argument also passes in FPR so
7105         // that the callee handling can skip over it easily.
7106         State.addLoc(
7107             FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7108                                              LocInfo)
7109                  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7110         break;
7111       }
7112     }
7113 
7114     return false;
7115   }
7116   case MVT::v4f32:
7117   case MVT::v4i32:
7118   case MVT::v8i16:
7119   case MVT::v16i8:
7120   case MVT::v2i64:
7121   case MVT::v2f64:
7122   case MVT::v1i128: {
7123     if (State.isVarArg())
7124       report_fatal_error(
7125           "variadic arguments for vector types are unimplemented for AIX");
7126 
7127     if (unsigned VReg = State.AllocateReg(VR))
7128       State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7129     else {
7130       report_fatal_error(
7131           "passing vector parameters to the stack is unimplemented for AIX");
7132     }
7133     return false;
7134   }
7135   }
7136   return true;
7137 }
7138 
7139 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7140                                                     bool IsPPC64) {
7141   assert((IsPPC64 || SVT != MVT::i64) &&
7142          "i64 should have been split for 32-bit codegen.");
7143 
7144   switch (SVT) {
7145   default:
7146     report_fatal_error("Unexpected value type for formal argument");
7147   case MVT::i1:
7148   case MVT::i32:
7149   case MVT::i64:
7150     return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7151   case MVT::f32:
7152     return &PPC::F4RCRegClass;
7153   case MVT::f64:
7154     return &PPC::F8RCRegClass;
7155   case MVT::v4f32:
7156   case MVT::v4i32:
7157   case MVT::v8i16:
7158   case MVT::v16i8:
7159   case MVT::v2i64:
7160   case MVT::v2f64:
7161   case MVT::v1i128:
7162     return &PPC::VRRCRegClass;
7163   }
7164 }
7165 
7166 static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7167                                         SelectionDAG &DAG, SDValue ArgValue,
7168                                         MVT LocVT, const SDLoc &dl) {
7169   assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7170   assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7171 
7172   if (Flags.isSExt())
7173     ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7174                            DAG.getValueType(ValVT));
7175   else if (Flags.isZExt())
7176     ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7177                            DAG.getValueType(ValVT));
7178 
7179   return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7180 }
7181 
7182 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7183   const unsigned LASize = FL->getLinkageSize();
7184 
7185   if (PPC::GPRCRegClass.contains(Reg)) {
7186     assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7187            "Reg must be a valid argument register!");
7188     return LASize + 4 * (Reg - PPC::R3);
7189   }
7190 
7191   if (PPC::G8RCRegClass.contains(Reg)) {
7192     assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7193            "Reg must be a valid argument register!");
7194     return LASize + 8 * (Reg - PPC::X3);
7195   }
7196 
7197   llvm_unreachable("Only general purpose registers expected.");
7198 }
7199 
7200 //   AIX ABI Stack Frame Layout:
7201 //
7202 //   Low Memory +--------------------------------------------+
7203 //   SP   +---> | Back chain                                 | ---+
7204 //        |     +--------------------------------------------+    |
7205 //        |     | Saved Condition Register                   |    |
7206 //        |     +--------------------------------------------+    |
7207 //        |     | Saved Linkage Register                     |    |
7208 //        |     +--------------------------------------------+    | Linkage Area
7209 //        |     | Reserved for compilers                     |    |
7210 //        |     +--------------------------------------------+    |
7211 //        |     | Reserved for binders                       |    |
7212 //        |     +--------------------------------------------+    |
7213 //        |     | Saved TOC pointer                          | ---+
7214 //        |     +--------------------------------------------+
7215 //        |     | Parameter save area                        |
7216 //        |     +--------------------------------------------+
7217 //        |     | Alloca space                               |
7218 //        |     +--------------------------------------------+
7219 //        |     | Local variable space                       |
7220 //        |     +--------------------------------------------+
7221 //        |     | Float/int conversion temporary             |
7222 //        |     +--------------------------------------------+
7223 //        |     | Save area for AltiVec registers            |
7224 //        |     +--------------------------------------------+
7225 //        |     | AltiVec alignment padding                  |
7226 //        |     +--------------------------------------------+
7227 //        |     | Save area for VRSAVE register              |
7228 //        |     +--------------------------------------------+
7229 //        |     | Save area for General Purpose registers    |
7230 //        |     +--------------------------------------------+
7231 //        |     | Save area for Floating Point registers     |
7232 //        |     +--------------------------------------------+
7233 //        +---- | Back chain                                 |
7234 // High Memory  +--------------------------------------------+
7235 //
7236 //  Specifications:
7237 //  AIX 7.2 Assembler Language Reference
7238 //  Subroutine linkage convention
7239 
7240 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7241     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7242     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7243     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7244 
7245   assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7246           CallConv == CallingConv::Fast) &&
7247          "Unexpected calling convention!");
7248 
7249   if (getTargetMachine().Options.GuaranteedTailCallOpt)
7250     report_fatal_error("Tail call support is unimplemented on AIX.");
7251 
7252   if (useSoftFloat())
7253     report_fatal_error("Soft float support is unimplemented on AIX.");
7254 
7255   const PPCSubtarget &Subtarget =
7256       static_cast<const PPCSubtarget &>(DAG.getSubtarget());
7257 
7258   const bool IsPPC64 = Subtarget.isPPC64();
7259   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7260 
7261   // Assign locations to all of the incoming arguments.
7262   SmallVector<CCValAssign, 16> ArgLocs;
7263   MachineFunction &MF = DAG.getMachineFunction();
7264   MachineFrameInfo &MFI = MF.getFrameInfo();
7265   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7266 
7267   const EVT PtrVT = getPointerTy(MF.getDataLayout());
7268   // Reserve space for the linkage area on the stack.
7269   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7270   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7271   CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7272 
7273   SmallVector<SDValue, 8> MemOps;
7274 
7275   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7276     CCValAssign &VA = ArgLocs[I++];
7277     MVT LocVT = VA.getLocVT();
7278     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7279     if (VA.isMemLoc() && VA.getValVT().isVector())
7280       report_fatal_error(
7281           "passing vector parameters to the stack is unimplemented for AIX");
7282 
7283     // For compatibility with the AIX XL compiler, the float args in the
7284     // parameter save area are initialized even if the argument is available
7285     // in register.  The caller is required to initialize both the register
7286     // and memory, however, the callee can choose to expect it in either.
7287     // The memloc is dismissed here because the argument is retrieved from
7288     // the register.
7289     if (VA.isMemLoc() && VA.needsCustom())
7290       continue;
7291 
7292     if (Flags.isByVal() && VA.isMemLoc()) {
7293       const unsigned Size =
7294           alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7295                   PtrByteSize);
7296       const int FI = MF.getFrameInfo().CreateFixedObject(
7297           Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7298           /* IsAliased */ true);
7299       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7300       InVals.push_back(FIN);
7301 
7302       continue;
7303     }
7304 
7305     if (Flags.isByVal()) {
7306       assert(VA.isRegLoc() && "MemLocs should already be handled.");
7307 
7308       const MCPhysReg ArgReg = VA.getLocReg();
7309       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7310 
7311       if (Flags.getNonZeroByValAlign() > PtrByteSize)
7312         report_fatal_error("Over aligned byvals not supported yet.");
7313 
7314       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7315       const int FI = MF.getFrameInfo().CreateFixedObject(
7316           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7317           /* IsAliased */ true);
7318       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7319       InVals.push_back(FIN);
7320 
7321       // Add live ins for all the RegLocs for the same ByVal.
7322       const TargetRegisterClass *RegClass =
7323           IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7324 
7325       auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7326                                                unsigned Offset) {
7327         const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
7328         // Since the callers side has left justified the aggregate in the
7329         // register, we can simply store the entire register into the stack
7330         // slot.
7331         SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7332         // The store to the fixedstack object is needed becuase accessing a
7333         // field of the ByVal will use a gep and load. Ideally we will optimize
7334         // to extracting the value from the register directly, and elide the
7335         // stores when the arguments address is not taken, but that will need to
7336         // be future work.
7337         SDValue Store = DAG.getStore(
7338             CopyFrom.getValue(1), dl, CopyFrom,
7339             DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7340             MachinePointerInfo::getFixedStack(MF, FI, Offset));
7341 
7342         MemOps.push_back(Store);
7343       };
7344 
7345       unsigned Offset = 0;
7346       HandleRegLoc(VA.getLocReg(), Offset);
7347       Offset += PtrByteSize;
7348       for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7349            Offset += PtrByteSize) {
7350         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7351                "RegLocs should be for ByVal argument.");
7352 
7353         const CCValAssign RL = ArgLocs[I++];
7354         HandleRegLoc(RL.getLocReg(), Offset);
7355       }
7356 
7357       if (Offset != StackSize) {
7358         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7359                "Expected MemLoc for remaining bytes.");
7360         assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7361         // Consume the MemLoc.The InVal has already been emitted, so nothing
7362         // more needs to be done.
7363         ++I;
7364       }
7365 
7366       continue;
7367     }
7368 
7369     EVT ValVT = VA.getValVT();
7370     if (VA.isRegLoc() && !VA.needsCustom()) {
7371       MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
7372       unsigned VReg =
7373           MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
7374       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7375       if (ValVT.isScalarInteger() &&
7376           (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7377         ArgValue =
7378             truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7379       }
7380       InVals.push_back(ArgValue);
7381       continue;
7382     }
7383     if (VA.isMemLoc()) {
7384       const unsigned LocSize = LocVT.getStoreSize();
7385       const unsigned ValSize = ValVT.getStoreSize();
7386       assert((ValSize <= LocSize) &&
7387              "Object size is larger than size of MemLoc");
7388       int CurArgOffset = VA.getLocMemOffset();
7389       // Objects are right-justified because AIX is big-endian.
7390       if (LocSize > ValSize)
7391         CurArgOffset += LocSize - ValSize;
7392       // Potential tail calls could cause overwriting of argument stack slots.
7393       const bool IsImmutable =
7394           !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7395             (CallConv == CallingConv::Fast));
7396       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7397       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7398       SDValue ArgValue =
7399           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7400       InVals.push_back(ArgValue);
7401       continue;
7402     }
7403   }
7404 
7405   // On AIX a minimum of 8 words is saved to the parameter save area.
7406   const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7407   // Area that is at least reserved in the caller of this function.
7408   unsigned CallerReservedArea =
7409       std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7410 
7411   // Set the size that is at least reserved in caller of this function. Tail
7412   // call optimized function's reserved stack space needs to be aligned so
7413   // that taking the difference between two stack areas will result in an
7414   // aligned stack.
7415   CallerReservedArea =
7416       EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7417   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7418   FuncInfo->setMinReservedArea(CallerReservedArea);
7419 
7420   if (isVarArg) {
7421     FuncInfo->setVarArgsFrameIndex(
7422         MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7423     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7424 
7425     static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7426                                        PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7427 
7428     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7429                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7430     const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7431 
7432     // The fixed integer arguments of a variadic function are stored to the
7433     // VarArgsFrameIndex on the stack so that they may be loaded by
7434     // dereferencing the result of va_next.
7435     for (unsigned GPRIndex =
7436              (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7437          GPRIndex < NumGPArgRegs; ++GPRIndex) {
7438 
7439       const unsigned VReg =
7440           IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7441                   : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7442 
7443       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7444       SDValue Store =
7445           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7446       MemOps.push_back(Store);
7447       // Increment the address for the next argument to store.
7448       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7449       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7450     }
7451   }
7452 
7453   if (!MemOps.empty())
7454     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7455 
7456   return Chain;
7457 }
7458 
7459 SDValue PPCTargetLowering::LowerCall_AIX(
7460     SDValue Chain, SDValue Callee, CallFlags CFlags,
7461     const SmallVectorImpl<ISD::OutputArg> &Outs,
7462     const SmallVectorImpl<SDValue> &OutVals,
7463     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7464     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7465     const CallBase *CB) const {
7466   // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7467   // AIX ABI stack frame layout.
7468 
7469   assert((CFlags.CallConv == CallingConv::C ||
7470           CFlags.CallConv == CallingConv::Cold ||
7471           CFlags.CallConv == CallingConv::Fast) &&
7472          "Unexpected calling convention!");
7473 
7474   if (CFlags.IsPatchPoint)
7475     report_fatal_error("This call type is unimplemented on AIX.");
7476 
7477   const PPCSubtarget& Subtarget =
7478       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7479 
7480   MachineFunction &MF = DAG.getMachineFunction();
7481   SmallVector<CCValAssign, 16> ArgLocs;
7482   CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7483                  *DAG.getContext());
7484 
7485   // Reserve space for the linkage save area (LSA) on the stack.
7486   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7487   //   [SP][CR][LR][2 x reserved][TOC].
7488   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7489   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7490   const bool IsPPC64 = Subtarget.isPPC64();
7491   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7492   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7493   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7494   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7495 
7496   // The prolog code of the callee may store up to 8 GPR argument registers to
7497   // the stack, allowing va_start to index over them in memory if the callee
7498   // is variadic.
7499   // Because we cannot tell if this is needed on the caller side, we have to
7500   // conservatively assume that it is needed.  As such, make sure we have at
7501   // least enough stack space for the caller to store the 8 GPRs.
7502   const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7503   const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7504                                      CCInfo.getNextStackOffset());
7505 
7506   // Adjust the stack pointer for the new arguments...
7507   // These operations are automatically eliminated by the prolog/epilog pass.
7508   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7509   SDValue CallSeqStart = Chain;
7510 
7511   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7512   SmallVector<SDValue, 8> MemOpChains;
7513 
7514   // Set up a copy of the stack pointer for loading and storing any
7515   // arguments that may not fit in the registers available for argument
7516   // passing.
7517   const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7518                                    : DAG.getRegister(PPC::R1, MVT::i32);
7519 
7520   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7521     const unsigned ValNo = ArgLocs[I].getValNo();
7522     SDValue Arg = OutVals[ValNo];
7523     ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7524 
7525     if (Flags.isByVal()) {
7526       const unsigned ByValSize = Flags.getByValSize();
7527 
7528       // Nothing to do for zero-sized ByVals on the caller side.
7529       if (!ByValSize) {
7530         ++I;
7531         continue;
7532       }
7533 
7534       auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7535         return DAG.getExtLoad(
7536             ISD::ZEXTLOAD, dl, PtrVT, Chain,
7537             (LoadOffset != 0)
7538                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7539                 : Arg,
7540             MachinePointerInfo(), VT);
7541       };
7542 
7543       unsigned LoadOffset = 0;
7544 
7545       // Initialize registers, which are fully occupied by the by-val argument.
7546       while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7547         SDValue Load = GetLoad(PtrVT, LoadOffset);
7548         MemOpChains.push_back(Load.getValue(1));
7549         LoadOffset += PtrByteSize;
7550         const CCValAssign &ByValVA = ArgLocs[I++];
7551         assert(ByValVA.getValNo() == ValNo &&
7552                "Unexpected location for pass-by-value argument.");
7553         RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7554       }
7555 
7556       if (LoadOffset == ByValSize)
7557         continue;
7558 
7559       // There must be one more loc to handle the remainder.
7560       assert(ArgLocs[I].getValNo() == ValNo &&
7561              "Expected additional location for by-value argument.");
7562 
7563       if (ArgLocs[I].isMemLoc()) {
7564         assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7565         const CCValAssign &ByValVA = ArgLocs[I++];
7566         ISD::ArgFlagsTy MemcpyFlags = Flags;
7567         // Only memcpy the bytes that don't pass in register.
7568         MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7569         Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7570             (LoadOffset != 0)
7571                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7572                 : Arg,
7573             DAG.getObjectPtrOffset(dl, StackPtr,
7574                                    TypeSize::Fixed(ByValVA.getLocMemOffset())),
7575             CallSeqStart, MemcpyFlags, DAG, dl);
7576         continue;
7577       }
7578 
7579       // Initialize the final register residue.
7580       // Any residue that occupies the final by-val arg register must be
7581       // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7582       // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7583       // 2 and 1 byte loads.
7584       const unsigned ResidueBytes = ByValSize % PtrByteSize;
7585       assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7586              "Unexpected register residue for by-value argument.");
7587       SDValue ResidueVal;
7588       for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7589         const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7590         const MVT VT =
7591             N == 1 ? MVT::i8
7592                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7593         SDValue Load = GetLoad(VT, LoadOffset);
7594         MemOpChains.push_back(Load.getValue(1));
7595         LoadOffset += N;
7596         Bytes += N;
7597 
7598         // By-val arguments are passed left-justfied in register.
7599         // Every load here needs to be shifted, otherwise a full register load
7600         // should have been used.
7601         assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7602                "Unexpected load emitted during handling of pass-by-value "
7603                "argument.");
7604         unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7605         EVT ShiftAmountTy =
7606             getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7607         SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7608         SDValue ShiftedLoad =
7609             DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7610         ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7611                                               ShiftedLoad)
7612                                 : ShiftedLoad;
7613       }
7614 
7615       const CCValAssign &ByValVA = ArgLocs[I++];
7616       RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7617       continue;
7618     }
7619 
7620     CCValAssign &VA = ArgLocs[I++];
7621     const MVT LocVT = VA.getLocVT();
7622     const MVT ValVT = VA.getValVT();
7623 
7624     if (VA.isMemLoc() && VA.getValVT().isVector())
7625       report_fatal_error(
7626           "passing vector parameters to the stack is unimplemented for AIX");
7627 
7628     switch (VA.getLocInfo()) {
7629     default:
7630       report_fatal_error("Unexpected argument extension type.");
7631     case CCValAssign::Full:
7632       break;
7633     case CCValAssign::ZExt:
7634       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7635       break;
7636     case CCValAssign::SExt:
7637       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7638       break;
7639     }
7640 
7641     if (VA.isRegLoc() && !VA.needsCustom()) {
7642       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7643       continue;
7644     }
7645 
7646     if (VA.isMemLoc()) {
7647       SDValue PtrOff =
7648           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7649       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7650       MemOpChains.push_back(
7651           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7652 
7653       continue;
7654     }
7655 
7656     // Custom handling is used for GPR initializations for vararg float
7657     // arguments.
7658     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7659            ValVT.isFloatingPoint() && LocVT.isInteger() &&
7660            "Unexpected register handling for calling convention.");
7661 
7662     SDValue ArgAsInt =
7663         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7664 
7665     if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7666       // f32 in 32-bit GPR
7667       // f64 in 64-bit GPR
7668       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7669     else if (Arg.getValueType().getFixedSizeInBits() <
7670              LocVT.getFixedSizeInBits())
7671       // f32 in 64-bit GPR.
7672       RegsToPass.push_back(std::make_pair(
7673           VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7674     else {
7675       // f64 in two 32-bit GPRs
7676       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7677       assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7678              "Unexpected custom register for argument!");
7679       CCValAssign &GPR1 = VA;
7680       SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7681                                      DAG.getConstant(32, dl, MVT::i8));
7682       RegsToPass.push_back(std::make_pair(
7683           GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7684 
7685       if (I != E) {
7686         // If only 1 GPR was available, there will only be one custom GPR and
7687         // the argument will also pass in memory.
7688         CCValAssign &PeekArg = ArgLocs[I];
7689         if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7690           assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7691           CCValAssign &GPR2 = ArgLocs[I++];
7692           RegsToPass.push_back(std::make_pair(
7693               GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7694         }
7695       }
7696     }
7697   }
7698 
7699   if (!MemOpChains.empty())
7700     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7701 
7702   // For indirect calls, we need to save the TOC base to the stack for
7703   // restoration after the call.
7704   if (CFlags.IsIndirect) {
7705     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7706     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7707     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7708     const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7709     const unsigned TOCSaveOffset =
7710         Subtarget.getFrameLowering()->getTOCSaveOffset();
7711 
7712     setUsesTOCBasePtr(DAG);
7713     SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7714     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7715     SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7716     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7717     Chain = DAG.getStore(
7718         Val.getValue(1), dl, Val, AddPtr,
7719         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7720   }
7721 
7722   // Build a sequence of copy-to-reg nodes chained together with token chain
7723   // and flag operands which copy the outgoing args into the appropriate regs.
7724   SDValue InFlag;
7725   for (auto Reg : RegsToPass) {
7726     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7727     InFlag = Chain.getValue(1);
7728   }
7729 
7730   const int SPDiff = 0;
7731   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7732                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
7733 }
7734 
7735 bool
7736 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7737                                   MachineFunction &MF, bool isVarArg,
7738                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
7739                                   LLVMContext &Context) const {
7740   SmallVector<CCValAssign, 16> RVLocs;
7741   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7742   return CCInfo.CheckReturn(
7743       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7744                 ? RetCC_PPC_Cold
7745                 : RetCC_PPC);
7746 }
7747 
7748 SDValue
7749 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7750                                bool isVarArg,
7751                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7752                                const SmallVectorImpl<SDValue> &OutVals,
7753                                const SDLoc &dl, SelectionDAG &DAG) const {
7754   SmallVector<CCValAssign, 16> RVLocs;
7755   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7756                  *DAG.getContext());
7757   CCInfo.AnalyzeReturn(Outs,
7758                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7759                            ? RetCC_PPC_Cold
7760                            : RetCC_PPC);
7761 
7762   SDValue Flag;
7763   SmallVector<SDValue, 4> RetOps(1, Chain);
7764 
7765   // Copy the result values into the output registers.
7766   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7767     CCValAssign &VA = RVLocs[i];
7768     assert(VA.isRegLoc() && "Can only return in registers!");
7769 
7770     SDValue Arg = OutVals[RealResIdx];
7771 
7772     switch (VA.getLocInfo()) {
7773     default: llvm_unreachable("Unknown loc info!");
7774     case CCValAssign::Full: break;
7775     case CCValAssign::AExt:
7776       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7777       break;
7778     case CCValAssign::ZExt:
7779       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7780       break;
7781     case CCValAssign::SExt:
7782       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7783       break;
7784     }
7785     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7786       bool isLittleEndian = Subtarget.isLittleEndian();
7787       // Legalize ret f64 -> ret 2 x i32.
7788       SDValue SVal =
7789           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7790                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7791       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7792       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7793       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7794                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7795       Flag = Chain.getValue(1);
7796       VA = RVLocs[++i]; // skip ahead to next loc
7797       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7798     } else
7799       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7800     Flag = Chain.getValue(1);
7801     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7802   }
7803 
7804   RetOps[0] = Chain;  // Update chain.
7805 
7806   // Add the flag if we have it.
7807   if (Flag.getNode())
7808     RetOps.push_back(Flag);
7809 
7810   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7811 }
7812 
7813 SDValue
7814 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7815                                                 SelectionDAG &DAG) const {
7816   SDLoc dl(Op);
7817 
7818   // Get the correct type for integers.
7819   EVT IntVT = Op.getValueType();
7820 
7821   // Get the inputs.
7822   SDValue Chain = Op.getOperand(0);
7823   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7824   // Build a DYNAREAOFFSET node.
7825   SDValue Ops[2] = {Chain, FPSIdx};
7826   SDVTList VTs = DAG.getVTList(IntVT);
7827   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7828 }
7829 
7830 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7831                                              SelectionDAG &DAG) const {
7832   // When we pop the dynamic allocation we need to restore the SP link.
7833   SDLoc dl(Op);
7834 
7835   // Get the correct type for pointers.
7836   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7837 
7838   // Construct the stack pointer operand.
7839   bool isPPC64 = Subtarget.isPPC64();
7840   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7841   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7842 
7843   // Get the operands for the STACKRESTORE.
7844   SDValue Chain = Op.getOperand(0);
7845   SDValue SaveSP = Op.getOperand(1);
7846 
7847   // Load the old link SP.
7848   SDValue LoadLinkSP =
7849       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7850 
7851   // Restore the stack pointer.
7852   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7853 
7854   // Store the old link SP.
7855   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7856 }
7857 
7858 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7859   MachineFunction &MF = DAG.getMachineFunction();
7860   bool isPPC64 = Subtarget.isPPC64();
7861   EVT PtrVT = getPointerTy(MF.getDataLayout());
7862 
7863   // Get current frame pointer save index.  The users of this index will be
7864   // primarily DYNALLOC instructions.
7865   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7866   int RASI = FI->getReturnAddrSaveIndex();
7867 
7868   // If the frame pointer save index hasn't been defined yet.
7869   if (!RASI) {
7870     // Find out what the fix offset of the frame pointer save area.
7871     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7872     // Allocate the frame index for frame pointer save area.
7873     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7874     // Save the result.
7875     FI->setReturnAddrSaveIndex(RASI);
7876   }
7877   return DAG.getFrameIndex(RASI, PtrVT);
7878 }
7879 
7880 SDValue
7881 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7882   MachineFunction &MF = DAG.getMachineFunction();
7883   bool isPPC64 = Subtarget.isPPC64();
7884   EVT PtrVT = getPointerTy(MF.getDataLayout());
7885 
7886   // Get current frame pointer save index.  The users of this index will be
7887   // primarily DYNALLOC instructions.
7888   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7889   int FPSI = FI->getFramePointerSaveIndex();
7890 
7891   // If the frame pointer save index hasn't been defined yet.
7892   if (!FPSI) {
7893     // Find out what the fix offset of the frame pointer save area.
7894     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7895     // Allocate the frame index for frame pointer save area.
7896     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7897     // Save the result.
7898     FI->setFramePointerSaveIndex(FPSI);
7899   }
7900   return DAG.getFrameIndex(FPSI, PtrVT);
7901 }
7902 
7903 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7904                                                    SelectionDAG &DAG) const {
7905   MachineFunction &MF = DAG.getMachineFunction();
7906   // Get the inputs.
7907   SDValue Chain = Op.getOperand(0);
7908   SDValue Size  = Op.getOperand(1);
7909   SDLoc dl(Op);
7910 
7911   // Get the correct type for pointers.
7912   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7913   // Negate the size.
7914   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7915                                 DAG.getConstant(0, dl, PtrVT), Size);
7916   // Construct a node for the frame pointer save index.
7917   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7918   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7919   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7920   if (hasInlineStackProbe(MF))
7921     return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7922   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7923 }
7924 
7925 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7926                                                      SelectionDAG &DAG) const {
7927   MachineFunction &MF = DAG.getMachineFunction();
7928 
7929   bool isPPC64 = Subtarget.isPPC64();
7930   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7931 
7932   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7933   return DAG.getFrameIndex(FI, PtrVT);
7934 }
7935 
7936 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7937                                                SelectionDAG &DAG) const {
7938   SDLoc DL(Op);
7939   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7940                      DAG.getVTList(MVT::i32, MVT::Other),
7941                      Op.getOperand(0), Op.getOperand(1));
7942 }
7943 
7944 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7945                                                 SelectionDAG &DAG) const {
7946   SDLoc DL(Op);
7947   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7948                      Op.getOperand(0), Op.getOperand(1));
7949 }
7950 
7951 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7952   if (Op.getValueType().isVector())
7953     return LowerVectorLoad(Op, DAG);
7954 
7955   assert(Op.getValueType() == MVT::i1 &&
7956          "Custom lowering only for i1 loads");
7957 
7958   // First, load 8 bits into 32 bits, then truncate to 1 bit.
7959 
7960   SDLoc dl(Op);
7961   LoadSDNode *LD = cast<LoadSDNode>(Op);
7962 
7963   SDValue Chain = LD->getChain();
7964   SDValue BasePtr = LD->getBasePtr();
7965   MachineMemOperand *MMO = LD->getMemOperand();
7966 
7967   SDValue NewLD =
7968       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7969                      BasePtr, MVT::i8, MMO);
7970   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7971 
7972   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7973   return DAG.getMergeValues(Ops, dl);
7974 }
7975 
7976 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7977   if (Op.getOperand(1).getValueType().isVector())
7978     return LowerVectorStore(Op, DAG);
7979 
7980   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7981          "Custom lowering only for i1 stores");
7982 
7983   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7984 
7985   SDLoc dl(Op);
7986   StoreSDNode *ST = cast<StoreSDNode>(Op);
7987 
7988   SDValue Chain = ST->getChain();
7989   SDValue BasePtr = ST->getBasePtr();
7990   SDValue Value = ST->getValue();
7991   MachineMemOperand *MMO = ST->getMemOperand();
7992 
7993   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7994                       Value);
7995   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7996 }
7997 
7998 // FIXME: Remove this once the ANDI glue bug is fixed:
7999 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8000   assert(Op.getValueType() == MVT::i1 &&
8001          "Custom lowering only for i1 results");
8002 
8003   SDLoc DL(Op);
8004   return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8005 }
8006 
8007 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8008                                                SelectionDAG &DAG) const {
8009 
8010   // Implements a vector truncate that fits in a vector register as a shuffle.
8011   // We want to legalize vector truncates down to where the source fits in
8012   // a vector register (and target is therefore smaller than vector register
8013   // size).  At that point legalization will try to custom lower the sub-legal
8014   // result and get here - where we can contain the truncate as a single target
8015   // operation.
8016 
8017   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8018   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8019   //
8020   // We will implement it for big-endian ordering as this (where x denotes
8021   // undefined):
8022   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8023   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8024   //
8025   // The same operation in little-endian ordering will be:
8026   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8027   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8028 
8029   EVT TrgVT = Op.getValueType();
8030   assert(TrgVT.isVector() && "Vector type expected.");
8031   unsigned TrgNumElts = TrgVT.getVectorNumElements();
8032   EVT EltVT = TrgVT.getVectorElementType();
8033   if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8034       TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8035       !isPowerOf2_32(EltVT.getSizeInBits()))
8036     return SDValue();
8037 
8038   SDValue N1 = Op.getOperand(0);
8039   EVT SrcVT = N1.getValueType();
8040   unsigned SrcSize = SrcVT.getSizeInBits();
8041   if (SrcSize > 256 ||
8042       !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8043       !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
8044     return SDValue();
8045   if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8046     return SDValue();
8047 
8048   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8049   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8050 
8051   SDLoc DL(Op);
8052   SDValue Op1, Op2;
8053   if (SrcSize == 256) {
8054     EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8055     EVT SplitVT =
8056         N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
8057     unsigned SplitNumElts = SplitVT.getVectorNumElements();
8058     Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8059                       DAG.getConstant(0, DL, VecIdxTy));
8060     Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8061                       DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8062   }
8063   else {
8064     Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8065     Op2 = DAG.getUNDEF(WideVT);
8066   }
8067 
8068   // First list the elements we want to keep.
8069   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8070   SmallVector<int, 16> ShuffV;
8071   if (Subtarget.isLittleEndian())
8072     for (unsigned i = 0; i < TrgNumElts; ++i)
8073       ShuffV.push_back(i * SizeMult);
8074   else
8075     for (unsigned i = 1; i <= TrgNumElts; ++i)
8076       ShuffV.push_back(i * SizeMult - 1);
8077 
8078   // Populate the remaining elements with undefs.
8079   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8080     // ShuffV.push_back(i + WideNumElts);
8081     ShuffV.push_back(WideNumElts + 1);
8082 
8083   Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8084   Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8085   return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8086 }
8087 
8088 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8089 /// possible.
8090 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8091   // Not FP, or using SPE? Not a fsel.
8092   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
8093       !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
8094     return Op;
8095 
8096   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8097 
8098   EVT ResVT = Op.getValueType();
8099   EVT CmpVT = Op.getOperand(0).getValueType();
8100   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8101   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
8102   SDLoc dl(Op);
8103   SDNodeFlags Flags = Op.getNode()->getFlags();
8104 
8105   // We have xsmaxcdp/xsmincdp which are OK to emit even in the
8106   // presence of infinities.
8107   if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8108     switch (CC) {
8109     default:
8110       break;
8111     case ISD::SETOGT:
8112     case ISD::SETGT:
8113       return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
8114     case ISD::SETOLT:
8115     case ISD::SETLT:
8116       return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
8117     }
8118   }
8119 
8120   // We might be able to do better than this under some circumstances, but in
8121   // general, fsel-based lowering of select is a finite-math-only optimization.
8122   // For more information, see section F.3 of the 2.06 ISA specification.
8123   // With ISA 3.0
8124   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8125       (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
8126     return Op;
8127 
8128   // If the RHS of the comparison is a 0.0, we don't need to do the
8129   // subtraction at all.
8130   SDValue Sel1;
8131   if (isFloatingPointZero(RHS))
8132     switch (CC) {
8133     default: break;       // SETUO etc aren't handled by fsel.
8134     case ISD::SETNE:
8135       std::swap(TV, FV);
8136       LLVM_FALLTHROUGH;
8137     case ISD::SETEQ:
8138       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8139         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8140       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8141       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8142         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8143       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8144                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8145     case ISD::SETULT:
8146     case ISD::SETLT:
8147       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8148       LLVM_FALLTHROUGH;
8149     case ISD::SETOGE:
8150     case ISD::SETGE:
8151       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8152         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8153       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8154     case ISD::SETUGT:
8155     case ISD::SETGT:
8156       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8157       LLVM_FALLTHROUGH;
8158     case ISD::SETOLE:
8159     case ISD::SETLE:
8160       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8161         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8162       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8163                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8164     }
8165 
8166   SDValue Cmp;
8167   switch (CC) {
8168   default: break;       // SETUO etc aren't handled by fsel.
8169   case ISD::SETNE:
8170     std::swap(TV, FV);
8171     LLVM_FALLTHROUGH;
8172   case ISD::SETEQ:
8173     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8174     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8175       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8176     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8177     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8178       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8179     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8180                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8181   case ISD::SETULT:
8182   case ISD::SETLT:
8183     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8184     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8185       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8186     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8187   case ISD::SETOGE:
8188   case ISD::SETGE:
8189     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8190     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8191       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8192     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8193   case ISD::SETUGT:
8194   case ISD::SETGT:
8195     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8196     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8197       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8198     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8199   case ISD::SETOLE:
8200   case ISD::SETLE:
8201     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8202     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8203       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8204     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8205   }
8206   return Op;
8207 }
8208 
8209 static unsigned getPPCStrictOpcode(unsigned Opc) {
8210   switch (Opc) {
8211   default:
8212     llvm_unreachable("No strict version of this opcode!");
8213   case PPCISD::FCTIDZ:
8214     return PPCISD::STRICT_FCTIDZ;
8215   case PPCISD::FCTIWZ:
8216     return PPCISD::STRICT_FCTIWZ;
8217   case PPCISD::FCTIDUZ:
8218     return PPCISD::STRICT_FCTIDUZ;
8219   case PPCISD::FCTIWUZ:
8220     return PPCISD::STRICT_FCTIWUZ;
8221   case PPCISD::FCFID:
8222     return PPCISD::STRICT_FCFID;
8223   case PPCISD::FCFIDU:
8224     return PPCISD::STRICT_FCFIDU;
8225   case PPCISD::FCFIDS:
8226     return PPCISD::STRICT_FCFIDS;
8227   case PPCISD::FCFIDUS:
8228     return PPCISD::STRICT_FCFIDUS;
8229   }
8230 }
8231 
8232 static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8233                               const PPCSubtarget &Subtarget) {
8234   SDLoc dl(Op);
8235   bool IsStrict = Op->isStrictFPOpcode();
8236   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8237                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8238 
8239   // TODO: Any other flags to propagate?
8240   SDNodeFlags Flags;
8241   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8242 
8243   // For strict nodes, source is the second operand.
8244   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8245   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8246   assert(Src.getValueType().isFloatingPoint());
8247   if (Src.getValueType() == MVT::f32) {
8248     if (IsStrict) {
8249       Src =
8250           DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8251                       DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8252       Chain = Src.getValue(1);
8253     } else
8254       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8255   }
8256   SDValue Conv;
8257   unsigned Opc = ISD::DELETED_NODE;
8258   switch (Op.getSimpleValueType().SimpleTy) {
8259   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8260   case MVT::i32:
8261     Opc = IsSigned ? PPCISD::FCTIWZ
8262                    : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8263     break;
8264   case MVT::i64:
8265     assert((IsSigned || Subtarget.hasFPCVT()) &&
8266            "i64 FP_TO_UINT is supported only with FPCVT");
8267     Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8268   }
8269   if (IsStrict) {
8270     Opc = getPPCStrictOpcode(Opc);
8271     Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8272                        {Chain, Src}, Flags);
8273   } else {
8274     Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8275   }
8276   return Conv;
8277 }
8278 
8279 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8280                                                SelectionDAG &DAG,
8281                                                const SDLoc &dl) const {
8282   SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8283   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8284                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8285   bool IsStrict = Op->isStrictFPOpcode();
8286 
8287   // Convert the FP value to an int value through memory.
8288   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8289                   (IsSigned || Subtarget.hasFPCVT());
8290   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8291   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8292   MachinePointerInfo MPI =
8293       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8294 
8295   // Emit a store to the stack slot.
8296   SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8297   Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8298   if (i32Stack) {
8299     MachineFunction &MF = DAG.getMachineFunction();
8300     Alignment = Align(4);
8301     MachineMemOperand *MMO =
8302         MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8303     SDValue Ops[] = { Chain, Tmp, FIPtr };
8304     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8305               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8306   } else
8307     Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8308 
8309   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
8310   // add in a bias on big endian.
8311   if (Op.getValueType() == MVT::i32 && !i32Stack) {
8312     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8313                         DAG.getConstant(4, dl, FIPtr.getValueType()));
8314     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8315   }
8316 
8317   RLI.Chain = Chain;
8318   RLI.Ptr = FIPtr;
8319   RLI.MPI = MPI;
8320   RLI.Alignment = Alignment;
8321 }
8322 
8323 /// Custom lowers floating point to integer conversions to use
8324 /// the direct move instructions available in ISA 2.07 to avoid the
8325 /// need for load/store combinations.
8326 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8327                                                     SelectionDAG &DAG,
8328                                                     const SDLoc &dl) const {
8329   SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8330   SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8331   if (Op->isStrictFPOpcode())
8332     return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8333   else
8334     return Mov;
8335 }
8336 
8337 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8338                                           const SDLoc &dl) const {
8339   bool IsStrict = Op->isStrictFPOpcode();
8340   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8341                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8342   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8343   EVT SrcVT = Src.getValueType();
8344   EVT DstVT = Op.getValueType();
8345 
8346   // FP to INT conversions are legal for f128.
8347   if (SrcVT == MVT::f128)
8348     return Op;
8349 
8350   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8351   // PPC (the libcall is not available).
8352   if (SrcVT == MVT::ppcf128) {
8353     if (DstVT == MVT::i32) {
8354       // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8355       // set other fast-math flags to FP operations in both strict and
8356       // non-strict cases. (FP_TO_SINT, FSUB)
8357       SDNodeFlags Flags;
8358       Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8359 
8360       if (IsSigned) {
8361         SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8362                                  DAG.getIntPtrConstant(0, dl));
8363         SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8364                                  DAG.getIntPtrConstant(1, dl));
8365 
8366         // Add the two halves of the long double in round-to-zero mode, and use
8367         // a smaller FP_TO_SINT.
8368         if (IsStrict) {
8369           SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8370                                     DAG.getVTList(MVT::f64, MVT::Other),
8371                                     {Op.getOperand(0), Lo, Hi}, Flags);
8372           return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8373                              DAG.getVTList(MVT::i32, MVT::Other),
8374                              {Res.getValue(1), Res}, Flags);
8375         } else {
8376           SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8377           return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8378         }
8379       } else {
8380         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8381         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8382         SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8383         SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8384         if (IsStrict) {
8385           // Sel = Src < 0x80000000
8386           // FltOfs = select Sel, 0.0, 0x80000000
8387           // IntOfs = select Sel, 0, 0x80000000
8388           // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8389           SDValue Chain = Op.getOperand(0);
8390           EVT SetCCVT =
8391               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8392           EVT DstSetCCVT =
8393               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8394           SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8395                                      Chain, true);
8396           Chain = Sel.getValue(1);
8397 
8398           SDValue FltOfs = DAG.getSelect(
8399               dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8400           Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8401 
8402           SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8403                                     DAG.getVTList(SrcVT, MVT::Other),
8404                                     {Chain, Src, FltOfs}, Flags);
8405           Chain = Val.getValue(1);
8406           SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8407                                      DAG.getVTList(DstVT, MVT::Other),
8408                                      {Chain, Val}, Flags);
8409           Chain = SInt.getValue(1);
8410           SDValue IntOfs = DAG.getSelect(
8411               dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8412           SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8413           return DAG.getMergeValues({Result, Chain}, dl);
8414         } else {
8415           // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8416           // FIXME: generated code sucks.
8417           SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8418           True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8419           True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8420           SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8421           return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8422         }
8423       }
8424     }
8425 
8426     return SDValue();
8427   }
8428 
8429   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8430     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8431 
8432   ReuseLoadInfo RLI;
8433   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8434 
8435   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8436                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8437 }
8438 
8439 // We're trying to insert a regular store, S, and then a load, L. If the
8440 // incoming value, O, is a load, we might just be able to have our load use the
8441 // address used by O. However, we don't know if anything else will store to
8442 // that address before we can load from it. To prevent this situation, we need
8443 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8444 // the same chain operand as O, we create a token factor from the chain results
8445 // of O and L, and we replace all uses of O's chain result with that token
8446 // factor (see spliceIntoChain below for this last part).
8447 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8448                                             ReuseLoadInfo &RLI,
8449                                             SelectionDAG &DAG,
8450                                             ISD::LoadExtType ET) const {
8451   // Conservatively skip reusing for constrained FP nodes.
8452   if (Op->isStrictFPOpcode())
8453     return false;
8454 
8455   SDLoc dl(Op);
8456   bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8457                        (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8458   if (ET == ISD::NON_EXTLOAD &&
8459       (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8460       isOperationLegalOrCustom(Op.getOpcode(),
8461                                Op.getOperand(0).getValueType())) {
8462 
8463     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8464     return true;
8465   }
8466 
8467   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8468   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8469       LD->isNonTemporal())
8470     return false;
8471   if (LD->getMemoryVT() != MemVT)
8472     return false;
8473 
8474   // If the result of the load is an illegal type, then we can't build a
8475   // valid chain for reuse since the legalised loads and token factor node that
8476   // ties the legalised loads together uses a different output chain then the
8477   // illegal load.
8478   if (!isTypeLegal(LD->getValueType(0)))
8479     return false;
8480 
8481   RLI.Ptr = LD->getBasePtr();
8482   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8483     assert(LD->getAddressingMode() == ISD::PRE_INC &&
8484            "Non-pre-inc AM on PPC?");
8485     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8486                           LD->getOffset());
8487   }
8488 
8489   RLI.Chain = LD->getChain();
8490   RLI.MPI = LD->getPointerInfo();
8491   RLI.IsDereferenceable = LD->isDereferenceable();
8492   RLI.IsInvariant = LD->isInvariant();
8493   RLI.Alignment = LD->getAlign();
8494   RLI.AAInfo = LD->getAAInfo();
8495   RLI.Ranges = LD->getRanges();
8496 
8497   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8498   return true;
8499 }
8500 
8501 // Given the head of the old chain, ResChain, insert a token factor containing
8502 // it and NewResChain, and make users of ResChain now be users of that token
8503 // factor.
8504 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8505 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8506                                         SDValue NewResChain,
8507                                         SelectionDAG &DAG) const {
8508   if (!ResChain)
8509     return;
8510 
8511   SDLoc dl(NewResChain);
8512 
8513   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8514                            NewResChain, DAG.getUNDEF(MVT::Other));
8515   assert(TF.getNode() != NewResChain.getNode() &&
8516          "A new TF really is required here");
8517 
8518   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8519   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8520 }
8521 
8522 /// Analyze profitability of direct move
8523 /// prefer float load to int load plus direct move
8524 /// when there is no integer use of int load
8525 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8526   SDNode *Origin = Op.getOperand(0).getNode();
8527   if (Origin->getOpcode() != ISD::LOAD)
8528     return true;
8529 
8530   // If there is no LXSIBZX/LXSIHZX, like Power8,
8531   // prefer direct move if the memory size is 1 or 2 bytes.
8532   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8533   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8534     return true;
8535 
8536   for (SDNode::use_iterator UI = Origin->use_begin(),
8537                             UE = Origin->use_end();
8538        UI != UE; ++UI) {
8539 
8540     // Only look at the users of the loaded value.
8541     if (UI.getUse().get().getResNo() != 0)
8542       continue;
8543 
8544     if (UI->getOpcode() != ISD::SINT_TO_FP &&
8545         UI->getOpcode() != ISD::UINT_TO_FP &&
8546         UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8547         UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8548       return true;
8549   }
8550 
8551   return false;
8552 }
8553 
8554 static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8555                               const PPCSubtarget &Subtarget,
8556                               SDValue Chain = SDValue()) {
8557   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8558                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8559   SDLoc dl(Op);
8560 
8561   // TODO: Any other flags to propagate?
8562   SDNodeFlags Flags;
8563   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8564 
8565   // If we have FCFIDS, then use it when converting to single-precision.
8566   // Otherwise, convert to double-precision and then round.
8567   bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8568   unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8569                               : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8570   EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8571   if (Op->isStrictFPOpcode()) {
8572     if (!Chain)
8573       Chain = Op.getOperand(0);
8574     return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8575                        DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8576   } else
8577     return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8578 }
8579 
8580 /// Custom lowers integer to floating point conversions to use
8581 /// the direct move instructions available in ISA 2.07 to avoid the
8582 /// need for load/store combinations.
8583 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8584                                                     SelectionDAG &DAG,
8585                                                     const SDLoc &dl) const {
8586   assert((Op.getValueType() == MVT::f32 ||
8587           Op.getValueType() == MVT::f64) &&
8588          "Invalid floating point type as target of conversion");
8589   assert(Subtarget.hasFPCVT() &&
8590          "Int to FP conversions with direct moves require FPCVT");
8591   SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8592   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8593   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8594                 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8595   unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8596   SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8597   return convertIntToFP(Op, Mov, DAG, Subtarget);
8598 }
8599 
8600 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8601 
8602   EVT VecVT = Vec.getValueType();
8603   assert(VecVT.isVector() && "Expected a vector type.");
8604   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8605 
8606   EVT EltVT = VecVT.getVectorElementType();
8607   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8608   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8609 
8610   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8611   SmallVector<SDValue, 16> Ops(NumConcat);
8612   Ops[0] = Vec;
8613   SDValue UndefVec = DAG.getUNDEF(VecVT);
8614   for (unsigned i = 1; i < NumConcat; ++i)
8615     Ops[i] = UndefVec;
8616 
8617   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8618 }
8619 
8620 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8621                                                 const SDLoc &dl) const {
8622   bool IsStrict = Op->isStrictFPOpcode();
8623   unsigned Opc = Op.getOpcode();
8624   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8625   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8626           Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8627          "Unexpected conversion type");
8628   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8629          "Supports conversions to v2f64/v4f32 only.");
8630 
8631   // TODO: Any other flags to propagate?
8632   SDNodeFlags Flags;
8633   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8634 
8635   bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8636   bool FourEltRes = Op.getValueType() == MVT::v4f32;
8637 
8638   SDValue Wide = widenVec(DAG, Src, dl);
8639   EVT WideVT = Wide.getValueType();
8640   unsigned WideNumElts = WideVT.getVectorNumElements();
8641   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8642 
8643   SmallVector<int, 16> ShuffV;
8644   for (unsigned i = 0; i < WideNumElts; ++i)
8645     ShuffV.push_back(i + WideNumElts);
8646 
8647   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8648   int SaveElts = FourEltRes ? 4 : 2;
8649   if (Subtarget.isLittleEndian())
8650     for (int i = 0; i < SaveElts; i++)
8651       ShuffV[i * Stride] = i;
8652   else
8653     for (int i = 1; i <= SaveElts; i++)
8654       ShuffV[i * Stride - 1] = i - 1;
8655 
8656   SDValue ShuffleSrc2 =
8657       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8658   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8659 
8660   SDValue Extend;
8661   if (SignedConv) {
8662     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8663     EVT ExtVT = Src.getValueType();
8664     if (Subtarget.hasP9Altivec())
8665       ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8666                                IntermediateVT.getVectorNumElements());
8667 
8668     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8669                          DAG.getValueType(ExtVT));
8670   } else
8671     Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8672 
8673   if (IsStrict)
8674     return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8675                        {Op.getOperand(0), Extend}, Flags);
8676 
8677   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8678 }
8679 
8680 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8681                                           SelectionDAG &DAG) const {
8682   SDLoc dl(Op);
8683   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8684                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8685   bool IsStrict = Op->isStrictFPOpcode();
8686   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8687   SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8688 
8689   // TODO: Any other flags to propagate?
8690   SDNodeFlags Flags;
8691   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8692 
8693   EVT InVT = Src.getValueType();
8694   EVT OutVT = Op.getValueType();
8695   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8696       isOperationCustom(Op.getOpcode(), InVT))
8697     return LowerINT_TO_FPVector(Op, DAG, dl);
8698 
8699   // Conversions to f128 are legal.
8700   if (Op.getValueType() == MVT::f128)
8701     return Op;
8702 
8703   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8704   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8705     return SDValue();
8706 
8707   if (Src.getValueType() == MVT::i1)
8708     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8709                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
8710                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
8711 
8712   // If we have direct moves, we can do all the conversion, skip the store/load
8713   // however, without FPCVT we can't do most conversions.
8714   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8715       Subtarget.isPPC64() && Subtarget.hasFPCVT())
8716     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8717 
8718   assert((IsSigned || Subtarget.hasFPCVT()) &&
8719          "UINT_TO_FP is supported only with FPCVT");
8720 
8721   if (Src.getValueType() == MVT::i64) {
8722     SDValue SINT = Src;
8723     // When converting to single-precision, we actually need to convert
8724     // to double-precision first and then round to single-precision.
8725     // To avoid double-rounding effects during that operation, we have
8726     // to prepare the input operand.  Bits that might be truncated when
8727     // converting to double-precision are replaced by a bit that won't
8728     // be lost at this stage, but is below the single-precision rounding
8729     // position.
8730     //
8731     // However, if -enable-unsafe-fp-math is in effect, accept double
8732     // rounding to avoid the extra overhead.
8733     if (Op.getValueType() == MVT::f32 &&
8734         !Subtarget.hasFPCVT() &&
8735         !DAG.getTarget().Options.UnsafeFPMath) {
8736 
8737       // Twiddle input to make sure the low 11 bits are zero.  (If this
8738       // is the case, we are guaranteed the value will fit into the 53 bit
8739       // mantissa of an IEEE double-precision value without rounding.)
8740       // If any of those low 11 bits were not zero originally, make sure
8741       // bit 12 (value 2048) is set instead, so that the final rounding
8742       // to single-precision gets the correct result.
8743       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8744                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
8745       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8746                           Round, DAG.getConstant(2047, dl, MVT::i64));
8747       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8748       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8749                           Round, DAG.getConstant(-2048, dl, MVT::i64));
8750 
8751       // However, we cannot use that value unconditionally: if the magnitude
8752       // of the input value is small, the bit-twiddling we did above might
8753       // end up visibly changing the output.  Fortunately, in that case, we
8754       // don't need to twiddle bits since the original input will convert
8755       // exactly to double-precision floating-point already.  Therefore,
8756       // construct a conditional to use the original value if the top 11
8757       // bits are all sign-bit copies, and use the rounded value computed
8758       // above otherwise.
8759       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8760                                  SINT, DAG.getConstant(53, dl, MVT::i32));
8761       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8762                          Cond, DAG.getConstant(1, dl, MVT::i64));
8763       Cond = DAG.getSetCC(
8764           dl,
8765           getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8766           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8767 
8768       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8769     }
8770 
8771     ReuseLoadInfo RLI;
8772     SDValue Bits;
8773 
8774     MachineFunction &MF = DAG.getMachineFunction();
8775     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8776       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8777                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8778       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8779     } else if (Subtarget.hasLFIWAX() &&
8780                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8781       MachineMemOperand *MMO =
8782         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8783                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8784       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8785       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8786                                      DAG.getVTList(MVT::f64, MVT::Other),
8787                                      Ops, MVT::i32, MMO);
8788       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8789     } else if (Subtarget.hasFPCVT() &&
8790                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8791       MachineMemOperand *MMO =
8792         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8793                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8794       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8795       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8796                                      DAG.getVTList(MVT::f64, MVT::Other),
8797                                      Ops, MVT::i32, MMO);
8798       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8799     } else if (((Subtarget.hasLFIWAX() &&
8800                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8801                 (Subtarget.hasFPCVT() &&
8802                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8803                SINT.getOperand(0).getValueType() == MVT::i32) {
8804       MachineFrameInfo &MFI = MF.getFrameInfo();
8805       EVT PtrVT = getPointerTy(DAG.getDataLayout());
8806 
8807       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8808       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8809 
8810       SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8811                                    MachinePointerInfo::getFixedStack(
8812                                        DAG.getMachineFunction(), FrameIdx));
8813       Chain = Store;
8814 
8815       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8816              "Expected an i32 store");
8817 
8818       RLI.Ptr = FIdx;
8819       RLI.Chain = Chain;
8820       RLI.MPI =
8821           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8822       RLI.Alignment = Align(4);
8823 
8824       MachineMemOperand *MMO =
8825         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8826                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8827       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8828       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8829                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
8830                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
8831                                      Ops, MVT::i32, MMO);
8832       Chain = Bits.getValue(1);
8833     } else
8834       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8835 
8836     SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8837     if (IsStrict)
8838       Chain = FP.getValue(1);
8839 
8840     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8841       if (IsStrict)
8842         FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8843                          DAG.getVTList(MVT::f32, MVT::Other),
8844                          {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8845       else
8846         FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8847                          DAG.getIntPtrConstant(0, dl));
8848     }
8849     return FP;
8850   }
8851 
8852   assert(Src.getValueType() == MVT::i32 &&
8853          "Unhandled INT_TO_FP type in custom expander!");
8854   // Since we only generate this in 64-bit mode, we can take advantage of
8855   // 64-bit registers.  In particular, sign extend the input value into the
8856   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8857   // then lfd it and fcfid it.
8858   MachineFunction &MF = DAG.getMachineFunction();
8859   MachineFrameInfo &MFI = MF.getFrameInfo();
8860   EVT PtrVT = getPointerTy(MF.getDataLayout());
8861 
8862   SDValue Ld;
8863   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8864     ReuseLoadInfo RLI;
8865     bool ReusingLoad;
8866     if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8867       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8868       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8869 
8870       SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8871                                    MachinePointerInfo::getFixedStack(
8872                                        DAG.getMachineFunction(), FrameIdx));
8873       Chain = Store;
8874 
8875       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8876              "Expected an i32 store");
8877 
8878       RLI.Ptr = FIdx;
8879       RLI.Chain = Chain;
8880       RLI.MPI =
8881           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8882       RLI.Alignment = Align(4);
8883     }
8884 
8885     MachineMemOperand *MMO =
8886       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8887                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8888     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8889     Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8890                                  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8891                                  MVT::i32, MMO);
8892     Chain = Ld.getValue(1);
8893     if (ReusingLoad)
8894       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8895   } else {
8896     assert(Subtarget.isPPC64() &&
8897            "i32->FP without LFIWAX supported only on PPC64");
8898 
8899     int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8900     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8901 
8902     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8903 
8904     // STD the extended value into the stack slot.
8905     SDValue Store = DAG.getStore(
8906         Chain, dl, Ext64, FIdx,
8907         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8908     Chain = Store;
8909 
8910     // Load the value as a double.
8911     Ld = DAG.getLoad(
8912         MVT::f64, dl, Chain, FIdx,
8913         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8914     Chain = Ld.getValue(1);
8915   }
8916 
8917   // FCFID it and return it.
8918   SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8919   if (IsStrict)
8920     Chain = FP.getValue(1);
8921   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8922     if (IsStrict)
8923       FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8924                        DAG.getVTList(MVT::f32, MVT::Other),
8925                        {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8926     else
8927       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8928                        DAG.getIntPtrConstant(0, dl));
8929   }
8930   return FP;
8931 }
8932 
8933 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8934                                             SelectionDAG &DAG) const {
8935   SDLoc dl(Op);
8936   /*
8937    The rounding mode is in bits 30:31 of FPSR, and has the following
8938    settings:
8939      00 Round to nearest
8940      01 Round to 0
8941      10 Round to +inf
8942      11 Round to -inf
8943 
8944   FLT_ROUNDS, on the other hand, expects the following:
8945     -1 Undefined
8946      0 Round to 0
8947      1 Round to nearest
8948      2 Round to +inf
8949      3 Round to -inf
8950 
8951   To perform the conversion, we do:
8952     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8953   */
8954 
8955   MachineFunction &MF = DAG.getMachineFunction();
8956   EVT VT = Op.getValueType();
8957   EVT PtrVT = getPointerTy(MF.getDataLayout());
8958 
8959   // Save FP Control Word to register
8960   SDValue Chain = Op.getOperand(0);
8961   SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8962   Chain = MFFS.getValue(1);
8963 
8964   // Save FP register to stack slot
8965   int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8966   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8967   Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8968 
8969   // Load FP Control Word from low 32 bits of stack slot.
8970   SDValue Four = DAG.getConstant(4, dl, PtrVT);
8971   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8972   SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8973   Chain = CWD.getValue(1);
8974 
8975   // Transform as necessary
8976   SDValue CWD1 =
8977     DAG.getNode(ISD::AND, dl, MVT::i32,
8978                 CWD, DAG.getConstant(3, dl, MVT::i32));
8979   SDValue CWD2 =
8980     DAG.getNode(ISD::SRL, dl, MVT::i32,
8981                 DAG.getNode(ISD::AND, dl, MVT::i32,
8982                             DAG.getNode(ISD::XOR, dl, MVT::i32,
8983                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
8984                             DAG.getConstant(3, dl, MVT::i32)),
8985                 DAG.getConstant(1, dl, MVT::i32));
8986 
8987   SDValue RetVal =
8988     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8989 
8990   RetVal =
8991       DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8992                   dl, VT, RetVal);
8993 
8994   return DAG.getMergeValues({RetVal, Chain}, dl);
8995 }
8996 
8997 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8998   EVT VT = Op.getValueType();
8999   unsigned BitWidth = VT.getSizeInBits();
9000   SDLoc dl(Op);
9001   assert(Op.getNumOperands() == 3 &&
9002          VT == Op.getOperand(1).getValueType() &&
9003          "Unexpected SHL!");
9004 
9005   // Expand into a bunch of logical ops.  Note that these ops
9006   // depend on the PPC behavior for oversized shift amounts.
9007   SDValue Lo = Op.getOperand(0);
9008   SDValue Hi = Op.getOperand(1);
9009   SDValue Amt = Op.getOperand(2);
9010   EVT AmtVT = Amt.getValueType();
9011 
9012   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9013                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9014   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9015   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9016   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9017   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9018                              DAG.getConstant(-BitWidth, dl, AmtVT));
9019   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9020   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9021   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9022   SDValue OutOps[] = { OutLo, OutHi };
9023   return DAG.getMergeValues(OutOps, dl);
9024 }
9025 
9026 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9027   EVT VT = Op.getValueType();
9028   SDLoc dl(Op);
9029   unsigned BitWidth = VT.getSizeInBits();
9030   assert(Op.getNumOperands() == 3 &&
9031          VT == Op.getOperand(1).getValueType() &&
9032          "Unexpected SRL!");
9033 
9034   // Expand into a bunch of logical ops.  Note that these ops
9035   // depend on the PPC behavior for oversized shift amounts.
9036   SDValue Lo = Op.getOperand(0);
9037   SDValue Hi = Op.getOperand(1);
9038   SDValue Amt = Op.getOperand(2);
9039   EVT AmtVT = Amt.getValueType();
9040 
9041   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9042                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9043   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9044   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9045   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9046   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9047                              DAG.getConstant(-BitWidth, dl, AmtVT));
9048   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9049   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9050   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9051   SDValue OutOps[] = { OutLo, OutHi };
9052   return DAG.getMergeValues(OutOps, dl);
9053 }
9054 
9055 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9056   SDLoc dl(Op);
9057   EVT VT = Op.getValueType();
9058   unsigned BitWidth = VT.getSizeInBits();
9059   assert(Op.getNumOperands() == 3 &&
9060          VT == Op.getOperand(1).getValueType() &&
9061          "Unexpected SRA!");
9062 
9063   // Expand into a bunch of logical ops, followed by a select_cc.
9064   SDValue Lo = Op.getOperand(0);
9065   SDValue Hi = Op.getOperand(1);
9066   SDValue Amt = Op.getOperand(2);
9067   EVT AmtVT = Amt.getValueType();
9068 
9069   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9070                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9071   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9072   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9073   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9074   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9075                              DAG.getConstant(-BitWidth, dl, AmtVT));
9076   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9077   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9078   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9079                                   Tmp4, Tmp6, ISD::SETLE);
9080   SDValue OutOps[] = { OutLo, OutHi };
9081   return DAG.getMergeValues(OutOps, dl);
9082 }
9083 
9084 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9085                                             SelectionDAG &DAG) const {
9086   SDLoc dl(Op);
9087   EVT VT = Op.getValueType();
9088   unsigned BitWidth = VT.getSizeInBits();
9089 
9090   bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9091   SDValue X = Op.getOperand(0);
9092   SDValue Y = Op.getOperand(1);
9093   SDValue Z = Op.getOperand(2);
9094   EVT AmtVT = Z.getValueType();
9095 
9096   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9097   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9098   // This is simpler than TargetLowering::expandFunnelShift because we can rely
9099   // on PowerPC shift by BW being well defined.
9100   Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9101                   DAG.getConstant(BitWidth - 1, dl, AmtVT));
9102   SDValue SubZ =
9103       DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9104   X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9105   Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9106   return DAG.getNode(ISD::OR, dl, VT, X, Y);
9107 }
9108 
9109 //===----------------------------------------------------------------------===//
9110 // Vector related lowering.
9111 //
9112 
9113 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9114 /// element size of SplatSize. Cast the result to VT.
9115 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9116                                       SelectionDAG &DAG, const SDLoc &dl) {
9117   static const MVT VTys[] = { // canonical VT to use for each size.
9118     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9119   };
9120 
9121   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9122 
9123   // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9124   if (Val == ((1LU << (SplatSize * 8)) - 1)) {
9125     SplatSize = 1;
9126     Val = 0xFF;
9127   }
9128 
9129   EVT CanonicalVT = VTys[SplatSize-1];
9130 
9131   // Build a canonical splat for this value.
9132   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9133 }
9134 
9135 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9136 /// specified intrinsic ID.
9137 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9138                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
9139   if (DestVT == MVT::Other) DestVT = Op.getValueType();
9140   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9141                      DAG.getConstant(IID, dl, MVT::i32), Op);
9142 }
9143 
9144 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9145 /// specified intrinsic ID.
9146 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9147                                 SelectionDAG &DAG, const SDLoc &dl,
9148                                 EVT DestVT = MVT::Other) {
9149   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9150   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9151                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9152 }
9153 
9154 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9155 /// specified intrinsic ID.
9156 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9157                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9158                                 EVT DestVT = MVT::Other) {
9159   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9160   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9161                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9162 }
9163 
9164 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9165 /// amount.  The result has the specified value type.
9166 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9167                            SelectionDAG &DAG, const SDLoc &dl) {
9168   // Force LHS/RHS to be the right type.
9169   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9170   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9171 
9172   int Ops[16];
9173   for (unsigned i = 0; i != 16; ++i)
9174     Ops[i] = i + Amt;
9175   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9176   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9177 }
9178 
9179 /// Do we have an efficient pattern in a .td file for this node?
9180 ///
9181 /// \param V - pointer to the BuildVectorSDNode being matched
9182 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9183 ///
9184 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9185 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9186 /// the opposite is true (expansion is beneficial) are:
9187 /// - The node builds a vector out of integers that are not 32 or 64-bits
9188 /// - The node builds a vector out of constants
9189 /// - The node is a "load-and-splat"
9190 /// In all other cases, we will choose to keep the BUILD_VECTOR.
9191 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9192                                             bool HasDirectMove,
9193                                             bool HasP8Vector) {
9194   EVT VecVT = V->getValueType(0);
9195   bool RightType = VecVT == MVT::v2f64 ||
9196     (HasP8Vector && VecVT == MVT::v4f32) ||
9197     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9198   if (!RightType)
9199     return false;
9200 
9201   bool IsSplat = true;
9202   bool IsLoad = false;
9203   SDValue Op0 = V->getOperand(0);
9204 
9205   // This function is called in a block that confirms the node is not a constant
9206   // splat. So a constant BUILD_VECTOR here means the vector is built out of
9207   // different constants.
9208   if (V->isConstant())
9209     return false;
9210   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9211     if (V->getOperand(i).isUndef())
9212       return false;
9213     // We want to expand nodes that represent load-and-splat even if the
9214     // loaded value is a floating point truncation or conversion to int.
9215     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9216         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9217          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9218         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9219          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9220         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9221          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9222       IsLoad = true;
9223     // If the operands are different or the input is not a load and has more
9224     // uses than just this BV node, then it isn't a splat.
9225     if (V->getOperand(i) != Op0 ||
9226         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9227       IsSplat = false;
9228   }
9229   return !(IsSplat && IsLoad);
9230 }
9231 
9232 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9233 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9234 
9235   SDLoc dl(Op);
9236   SDValue Op0 = Op->getOperand(0);
9237 
9238   if ((Op.getValueType() != MVT::f128) ||
9239       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9240       (Op0.getOperand(0).getValueType() != MVT::i64) ||
9241       (Op0.getOperand(1).getValueType() != MVT::i64))
9242     return SDValue();
9243 
9244   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9245                      Op0.getOperand(1));
9246 }
9247 
9248 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9249   const SDValue *InputLoad = &Op;
9250   if (InputLoad->getOpcode() == ISD::BITCAST)
9251     InputLoad = &InputLoad->getOperand(0);
9252   if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9253       InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9254     IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9255     InputLoad = &InputLoad->getOperand(0);
9256   }
9257   if (InputLoad->getOpcode() != ISD::LOAD)
9258     return nullptr;
9259   LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9260   return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9261 }
9262 
9263 // Convert the argument APFloat to a single precision APFloat if there is no
9264 // loss in information during the conversion to single precision APFloat and the
9265 // resulting number is not a denormal number. Return true if successful.
9266 bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9267   APFloat APFloatToConvert = ArgAPFloat;
9268   bool LosesInfo = true;
9269   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9270                            &LosesInfo);
9271   bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9272   if (Success)
9273     ArgAPFloat = APFloatToConvert;
9274   return Success;
9275 }
9276 
9277 // Bitcast the argument APInt to a double and convert it to a single precision
9278 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9279 // argument if there is no loss in information during the conversion from
9280 // double to single precision APFloat and the resulting number is not a denormal
9281 // number. Return true if successful.
9282 bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9283   double DpValue = ArgAPInt.bitsToDouble();
9284   APFloat APFloatDp(DpValue);
9285   bool Success = convertToNonDenormSingle(APFloatDp);
9286   if (Success)
9287     ArgAPInt = APFloatDp.bitcastToAPInt();
9288   return Success;
9289 }
9290 
9291 // If this is a case we can't handle, return null and let the default
9292 // expansion code take care of it.  If we CAN select this case, and if it
9293 // selects to a single instruction, return Op.  Otherwise, if we can codegen
9294 // this case more efficiently than a constant pool load, lower it to the
9295 // sequence of ops that should be used.
9296 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9297                                              SelectionDAG &DAG) const {
9298   SDLoc dl(Op);
9299   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9300   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9301 
9302   // Check if this is a splat of a constant value.
9303   APInt APSplatBits, APSplatUndef;
9304   unsigned SplatBitSize;
9305   bool HasAnyUndefs;
9306   bool BVNIsConstantSplat =
9307       BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9308                            HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9309 
9310   // If it is a splat of a double, check if we can shrink it to a 32 bit
9311   // non-denormal float which when converted back to double gives us the same
9312   // double. This is to exploit the XXSPLTIDP instruction.
9313   if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
9314       (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
9315       convertToNonDenormSingle(APSplatBits)) {
9316     SDValue SplatNode = DAG.getNode(
9317         PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9318         DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9319     return DAG.getBitcast(Op.getValueType(), SplatNode);
9320   }
9321 
9322   if (!BVNIsConstantSplat || SplatBitSize > 32) {
9323 
9324     bool IsPermutedLoad = false;
9325     const SDValue *InputLoad =
9326         getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
9327     // Handle load-and-splat patterns as we have instructions that will do this
9328     // in one go.
9329     if (InputLoad && DAG.isSplatValue(Op, true)) {
9330       LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9331 
9332       // We have handling for 4 and 8 byte elements.
9333       unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
9334 
9335       // Checking for a single use of this load, we have to check for vector
9336       // width (128 bits) / ElementSize uses (since each operand of the
9337       // BUILD_VECTOR is a separate use of the value.
9338       unsigned NumUsesOfInputLD = 128 / ElementSize;
9339       for (SDValue BVInOp : Op->ops())
9340         if (BVInOp.isUndef())
9341           NumUsesOfInputLD--;
9342       assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9343       if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9344           ((Subtarget.hasVSX() && ElementSize == 64) ||
9345            (Subtarget.hasP9Vector() && ElementSize == 32))) {
9346         SDValue Ops[] = {
9347           LD->getChain(),    // Chain
9348           LD->getBasePtr(),  // Ptr
9349           DAG.getValueType(Op.getValueType()) // VT
9350         };
9351         SDValue LdSplt = DAG.getMemIntrinsicNode(
9352             PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
9353             Ops, LD->getMemoryVT(), LD->getMemOperand());
9354         // Replace all uses of the output chain of the original load with the
9355         // output chain of the new load.
9356         DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9357                                       LdSplt.getValue(1));
9358         return LdSplt;
9359       }
9360     }
9361 
9362     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
9363     // lowered to VSX instructions under certain conditions.
9364     // Without VSX, there is no pattern more efficient than expanding the node.
9365     if (Subtarget.hasVSX() &&
9366         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9367                                         Subtarget.hasP8Vector()))
9368       return Op;
9369     return SDValue();
9370   }
9371 
9372   uint64_t SplatBits = APSplatBits.getZExtValue();
9373   uint64_t SplatUndef = APSplatUndef.getZExtValue();
9374   unsigned SplatSize = SplatBitSize / 8;
9375 
9376   // First, handle single instruction cases.
9377 
9378   // All zeros?
9379   if (SplatBits == 0) {
9380     // Canonicalize all zero vectors to be v4i32.
9381     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9382       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9383       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9384     }
9385     return Op;
9386   }
9387 
9388   // We have XXSPLTIW for constant splats four bytes wide.
9389   // Given vector length is a multiple of 4, 2-byte splats can be replaced
9390   // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9391   // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9392   // turned into a 4-byte splat of 0xABABABAB.
9393   if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9394     return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9395                                   Op.getValueType(), DAG, dl);
9396 
9397   if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9398     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9399                                   dl);
9400 
9401   // We have XXSPLTIB for constant splats one byte wide.
9402   if (Subtarget.hasP9Vector() && SplatSize == 1)
9403     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9404                                   dl);
9405 
9406   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9407   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9408                     (32-SplatBitSize));
9409   if (SextVal >= -16 && SextVal <= 15)
9410     return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9411                                   dl);
9412 
9413   // Two instruction sequences.
9414 
9415   // If this value is in the range [-32,30] and is even, use:
9416   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9417   // If this value is in the range [17,31] and is odd, use:
9418   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9419   // If this value is in the range [-31,-17] and is odd, use:
9420   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9421   // Note the last two are three-instruction sequences.
9422   if (SextVal >= -32 && SextVal <= 31) {
9423     // To avoid having these optimizations undone by constant folding,
9424     // we convert to a pseudo that will be expanded later into one of
9425     // the above forms.
9426     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9427     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9428               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9429     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9430     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9431     if (VT == Op.getValueType())
9432       return RetVal;
9433     else
9434       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9435   }
9436 
9437   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
9438   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
9439   // for fneg/fabs.
9440   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9441     // Make -1 and vspltisw -1:
9442     SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9443 
9444     // Make the VSLW intrinsic, computing 0x8000_0000.
9445     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9446                                    OnesV, DAG, dl);
9447 
9448     // xor by OnesV to invert it.
9449     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9450     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9451   }
9452 
9453   // Check to see if this is a wide variety of vsplti*, binop self cases.
9454   static const signed char SplatCsts[] = {
9455     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9456     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9457   };
9458 
9459   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9460     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9461     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
9462     int i = SplatCsts[idx];
9463 
9464     // Figure out what shift amount will be used by altivec if shifted by i in
9465     // this splat size.
9466     unsigned TypeShiftAmt = i & (SplatBitSize-1);
9467 
9468     // vsplti + shl self.
9469     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9470       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9471       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9472         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9473         Intrinsic::ppc_altivec_vslw
9474       };
9475       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9476       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9477     }
9478 
9479     // vsplti + srl self.
9480     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9481       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9482       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9483         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9484         Intrinsic::ppc_altivec_vsrw
9485       };
9486       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9487       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9488     }
9489 
9490     // vsplti + sra self.
9491     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9492       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9493       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9494         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
9495         Intrinsic::ppc_altivec_vsraw
9496       };
9497       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9498       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9499     }
9500 
9501     // vsplti + rol self.
9502     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9503                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9504       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9505       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9506         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9507         Intrinsic::ppc_altivec_vrlw
9508       };
9509       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9510       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9511     }
9512 
9513     // t = vsplti c, result = vsldoi t, t, 1
9514     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9515       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9516       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9517       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9518     }
9519     // t = vsplti c, result = vsldoi t, t, 2
9520     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9521       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9522       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9523       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9524     }
9525     // t = vsplti c, result = vsldoi t, t, 3
9526     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9527       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9528       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9529       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9530     }
9531   }
9532 
9533   return SDValue();
9534 }
9535 
9536 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9537 /// the specified operations to build the shuffle.
9538 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9539                                       SDValue RHS, SelectionDAG &DAG,
9540                                       const SDLoc &dl) {
9541   unsigned OpNum = (PFEntry >> 26) & 0x0F;
9542   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9543   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
9544 
9545   enum {
9546     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9547     OP_VMRGHW,
9548     OP_VMRGLW,
9549     OP_VSPLTISW0,
9550     OP_VSPLTISW1,
9551     OP_VSPLTISW2,
9552     OP_VSPLTISW3,
9553     OP_VSLDOI4,
9554     OP_VSLDOI8,
9555     OP_VSLDOI12
9556   };
9557 
9558   if (OpNum == OP_COPY) {
9559     if (LHSID == (1*9+2)*9+3) return LHS;
9560     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9561     return RHS;
9562   }
9563 
9564   SDValue OpLHS, OpRHS;
9565   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9566   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9567 
9568   int ShufIdxs[16];
9569   switch (OpNum) {
9570   default: llvm_unreachable("Unknown i32 permute!");
9571   case OP_VMRGHW:
9572     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
9573     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9574     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
9575     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9576     break;
9577   case OP_VMRGLW:
9578     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9579     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9580     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9581     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9582     break;
9583   case OP_VSPLTISW0:
9584     for (unsigned i = 0; i != 16; ++i)
9585       ShufIdxs[i] = (i&3)+0;
9586     break;
9587   case OP_VSPLTISW1:
9588     for (unsigned i = 0; i != 16; ++i)
9589       ShufIdxs[i] = (i&3)+4;
9590     break;
9591   case OP_VSPLTISW2:
9592     for (unsigned i = 0; i != 16; ++i)
9593       ShufIdxs[i] = (i&3)+8;
9594     break;
9595   case OP_VSPLTISW3:
9596     for (unsigned i = 0; i != 16; ++i)
9597       ShufIdxs[i] = (i&3)+12;
9598     break;
9599   case OP_VSLDOI4:
9600     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9601   case OP_VSLDOI8:
9602     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9603   case OP_VSLDOI12:
9604     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9605   }
9606   EVT VT = OpLHS.getValueType();
9607   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9608   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9609   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9610   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9611 }
9612 
9613 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9614 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9615 /// SDValue.
9616 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9617                                            SelectionDAG &DAG) const {
9618   const unsigned BytesInVector = 16;
9619   bool IsLE = Subtarget.isLittleEndian();
9620   SDLoc dl(N);
9621   SDValue V1 = N->getOperand(0);
9622   SDValue V2 = N->getOperand(1);
9623   unsigned ShiftElts = 0, InsertAtByte = 0;
9624   bool Swap = false;
9625 
9626   // Shifts required to get the byte we want at element 7.
9627   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
9628                                    0, 15, 14, 13, 12, 11, 10, 9};
9629   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9630                                 1, 2,  3,  4,  5,  6,  7,  8};
9631 
9632   ArrayRef<int> Mask = N->getMask();
9633   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9634 
9635   // For each mask element, find out if we're just inserting something
9636   // from V2 into V1 or vice versa.
9637   // Possible permutations inserting an element from V2 into V1:
9638   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9639   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9640   //   ...
9641   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9642   // Inserting from V1 into V2 will be similar, except mask range will be
9643   // [16,31].
9644 
9645   bool FoundCandidate = false;
9646   // If both vector operands for the shuffle are the same vector, the mask
9647   // will contain only elements from the first one and the second one will be
9648   // undef.
9649   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9650   // Go through the mask of half-words to find an element that's being moved
9651   // from one vector to the other.
9652   for (unsigned i = 0; i < BytesInVector; ++i) {
9653     unsigned CurrentElement = Mask[i];
9654     // If 2nd operand is undefined, we should only look for element 7 in the
9655     // Mask.
9656     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9657       continue;
9658 
9659     bool OtherElementsInOrder = true;
9660     // Examine the other elements in the Mask to see if they're in original
9661     // order.
9662     for (unsigned j = 0; j < BytesInVector; ++j) {
9663       if (j == i)
9664         continue;
9665       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9666       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
9667       // in which we always assume we're always picking from the 1st operand.
9668       int MaskOffset =
9669           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9670       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9671         OtherElementsInOrder = false;
9672         break;
9673       }
9674     }
9675     // If other elements are in original order, we record the number of shifts
9676     // we need to get the element we want into element 7. Also record which byte
9677     // in the vector we should insert into.
9678     if (OtherElementsInOrder) {
9679       // If 2nd operand is undefined, we assume no shifts and no swapping.
9680       if (V2.isUndef()) {
9681         ShiftElts = 0;
9682         Swap = false;
9683       } else {
9684         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9685         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9686                          : BigEndianShifts[CurrentElement & 0xF];
9687         Swap = CurrentElement < BytesInVector;
9688       }
9689       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9690       FoundCandidate = true;
9691       break;
9692     }
9693   }
9694 
9695   if (!FoundCandidate)
9696     return SDValue();
9697 
9698   // Candidate found, construct the proper SDAG sequence with VINSERTB,
9699   // optionally with VECSHL if shift is required.
9700   if (Swap)
9701     std::swap(V1, V2);
9702   if (V2.isUndef())
9703     V2 = V1;
9704   if (ShiftElts) {
9705     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9706                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9707     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9708                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9709   }
9710   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9711                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
9712 }
9713 
9714 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9715 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9716 /// SDValue.
9717 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9718                                            SelectionDAG &DAG) const {
9719   const unsigned NumHalfWords = 8;
9720   const unsigned BytesInVector = NumHalfWords * 2;
9721   // Check that the shuffle is on half-words.
9722   if (!isNByteElemShuffleMask(N, 2, 1))
9723     return SDValue();
9724 
9725   bool IsLE = Subtarget.isLittleEndian();
9726   SDLoc dl(N);
9727   SDValue V1 = N->getOperand(0);
9728   SDValue V2 = N->getOperand(1);
9729   unsigned ShiftElts = 0, InsertAtByte = 0;
9730   bool Swap = false;
9731 
9732   // Shifts required to get the half-word we want at element 3.
9733   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9734   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9735 
9736   uint32_t Mask = 0;
9737   uint32_t OriginalOrderLow = 0x1234567;
9738   uint32_t OriginalOrderHigh = 0x89ABCDEF;
9739   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
9740   // 32-bit space, only need 4-bit nibbles per element.
9741   for (unsigned i = 0; i < NumHalfWords; ++i) {
9742     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9743     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9744   }
9745 
9746   // For each mask element, find out if we're just inserting something
9747   // from V2 into V1 or vice versa.  Possible permutations inserting an element
9748   // from V2 into V1:
9749   //   X, 1, 2, 3, 4, 5, 6, 7
9750   //   0, X, 2, 3, 4, 5, 6, 7
9751   //   0, 1, X, 3, 4, 5, 6, 7
9752   //   0, 1, 2, X, 4, 5, 6, 7
9753   //   0, 1, 2, 3, X, 5, 6, 7
9754   //   0, 1, 2, 3, 4, X, 6, 7
9755   //   0, 1, 2, 3, 4, 5, X, 7
9756   //   0, 1, 2, 3, 4, 5, 6, X
9757   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9758 
9759   bool FoundCandidate = false;
9760   // Go through the mask of half-words to find an element that's being moved
9761   // from one vector to the other.
9762   for (unsigned i = 0; i < NumHalfWords; ++i) {
9763     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9764     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9765     uint32_t MaskOtherElts = ~(0xF << MaskShift);
9766     uint32_t TargetOrder = 0x0;
9767 
9768     // If both vector operands for the shuffle are the same vector, the mask
9769     // will contain only elements from the first one and the second one will be
9770     // undef.
9771     if (V2.isUndef()) {
9772       ShiftElts = 0;
9773       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9774       TargetOrder = OriginalOrderLow;
9775       Swap = false;
9776       // Skip if not the correct element or mask of other elements don't equal
9777       // to our expected order.
9778       if (MaskOneElt == VINSERTHSrcElem &&
9779           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9780         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9781         FoundCandidate = true;
9782         break;
9783       }
9784     } else { // If both operands are defined.
9785       // Target order is [8,15] if the current mask is between [0,7].
9786       TargetOrder =
9787           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9788       // Skip if mask of other elements don't equal our expected order.
9789       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9790         // We only need the last 3 bits for the number of shifts.
9791         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9792                          : BigEndianShifts[MaskOneElt & 0x7];
9793         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9794         Swap = MaskOneElt < NumHalfWords;
9795         FoundCandidate = true;
9796         break;
9797       }
9798     }
9799   }
9800 
9801   if (!FoundCandidate)
9802     return SDValue();
9803 
9804   // Candidate found, construct the proper SDAG sequence with VINSERTH,
9805   // optionally with VECSHL if shift is required.
9806   if (Swap)
9807     std::swap(V1, V2);
9808   if (V2.isUndef())
9809     V2 = V1;
9810   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9811   if (ShiftElts) {
9812     // Double ShiftElts because we're left shifting on v16i8 type.
9813     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9814                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9815     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9816     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9817                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9818     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9819   }
9820   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9821   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9822                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
9823   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9824 }
9825 
9826 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9827 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9828 /// return the default SDValue.
9829 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9830                                               SelectionDAG &DAG) const {
9831   // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9832   // to v16i8. Peek through the bitcasts to get the actual operands.
9833   SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9834   SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9835 
9836   auto ShuffleMask = SVN->getMask();
9837   SDValue VecShuffle(SVN, 0);
9838   SDLoc DL(SVN);
9839 
9840   // Check that we have a four byte shuffle.
9841   if (!isNByteElemShuffleMask(SVN, 4, 1))
9842     return SDValue();
9843 
9844   // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9845   if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9846     std::swap(LHS, RHS);
9847     VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
9848     ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9849   }
9850 
9851   // Ensure that the RHS is a vector of constants.
9852   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9853   if (!BVN)
9854     return SDValue();
9855 
9856   // Check if RHS is a splat of 4-bytes (or smaller).
9857   APInt APSplatValue, APSplatUndef;
9858   unsigned SplatBitSize;
9859   bool HasAnyUndefs;
9860   if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9861                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9862       SplatBitSize > 32)
9863     return SDValue();
9864 
9865   // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9866   // The instruction splats a constant C into two words of the source vector
9867   // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9868   // Thus we check that the shuffle mask is the equivalent  of
9869   // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9870   // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9871   // within each word are consecutive, so we only need to check the first byte.
9872   SDValue Index;
9873   bool IsLE = Subtarget.isLittleEndian();
9874   if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9875       (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9876        ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9877     Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9878   else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9879            (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9880             ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9881     Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9882   else
9883     return SDValue();
9884 
9885   // If the splat is narrower than 32-bits, we need to get the 32-bit value
9886   // for XXSPLTI32DX.
9887   unsigned SplatVal = APSplatValue.getZExtValue();
9888   for (; SplatBitSize < 32; SplatBitSize <<= 1)
9889     SplatVal |= (SplatVal << SplatBitSize);
9890 
9891   SDValue SplatNode = DAG.getNode(
9892       PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9893       Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9894   return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9895 }
9896 
9897 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9898 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9899 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9900 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9901 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9902   assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9903   assert(Op.getValueType() == MVT::v1i128 &&
9904          "Only set v1i128 as custom, other type shouldn't reach here!");
9905   SDLoc dl(Op);
9906   SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9907   SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9908   unsigned SHLAmt = N1.getConstantOperandVal(0);
9909   if (SHLAmt % 8 == 0) {
9910     SmallVector<int, 16> Mask(16, 0);
9911     std::iota(Mask.begin(), Mask.end(), 0);
9912     std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9913     if (SDValue Shuffle =
9914             DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9915                                  DAG.getUNDEF(MVT::v16i8), Mask))
9916       return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9917   }
9918   SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9919   SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9920                               DAG.getConstant(SHLAmt, dl, MVT::i32));
9921   SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9922                               DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9923   SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9924   return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9925 }
9926 
9927 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
9928 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
9929 /// return the code it can be lowered into.  Worst case, it can always be
9930 /// lowered into a vperm.
9931 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9932                                                SelectionDAG &DAG) const {
9933   SDLoc dl(Op);
9934   SDValue V1 = Op.getOperand(0);
9935   SDValue V2 = Op.getOperand(1);
9936   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9937 
9938   // Any nodes that were combined in the target-independent combiner prior
9939   // to vector legalization will not be sent to the target combine. Try to
9940   // combine it here.
9941   if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9942     if (!isa<ShuffleVectorSDNode>(NewShuffle))
9943       return NewShuffle;
9944     Op = NewShuffle;
9945     SVOp = cast<ShuffleVectorSDNode>(Op);
9946     V1 = Op.getOperand(0);
9947     V2 = Op.getOperand(1);
9948   }
9949   EVT VT = Op.getValueType();
9950   bool isLittleEndian = Subtarget.isLittleEndian();
9951 
9952   unsigned ShiftElts, InsertAtByte;
9953   bool Swap = false;
9954 
9955   // If this is a load-and-splat, we can do that with a single instruction
9956   // in some cases. However if the load has multiple uses, we don't want to
9957   // combine it because that will just produce multiple loads.
9958   bool IsPermutedLoad = false;
9959   const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9960   if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9961       (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9962       InputLoad->hasOneUse()) {
9963     bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9964     int SplatIdx =
9965       PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9966 
9967     // The splat index for permuted loads will be in the left half of the vector
9968     // which is strictly wider than the loaded value by 8 bytes. So we need to
9969     // adjust the splat index to point to the correct address in memory.
9970     if (IsPermutedLoad) {
9971       assert(isLittleEndian && "Unexpected permuted load on big endian target");
9972       SplatIdx += IsFourByte ? 2 : 1;
9973       assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9974              "Splat of a value outside of the loaded memory");
9975     }
9976 
9977     LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9978     // For 4-byte load-and-splat, we need Power9.
9979     if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9980       uint64_t Offset = 0;
9981       if (IsFourByte)
9982         Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9983       else
9984         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9985 
9986       SDValue BasePtr = LD->getBasePtr();
9987       if (Offset != 0)
9988         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9989                               BasePtr, DAG.getIntPtrConstant(Offset, dl));
9990       SDValue Ops[] = {
9991         LD->getChain(),    // Chain
9992         BasePtr,           // BasePtr
9993         DAG.getValueType(Op.getValueType()) // VT
9994       };
9995       SDVTList VTL =
9996         DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9997       SDValue LdSplt =
9998         DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9999                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
10000       DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10001       if (LdSplt.getValueType() != SVOp->getValueType(0))
10002         LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10003       return LdSplt;
10004     }
10005   }
10006   if (Subtarget.hasP9Vector() &&
10007       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10008                            isLittleEndian)) {
10009     if (Swap)
10010       std::swap(V1, V2);
10011     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10012     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10013     if (ShiftElts) {
10014       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10015                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
10016       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10017                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10018       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10019     }
10020     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10021                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
10022     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10023   }
10024 
10025   if (Subtarget.hasPrefixInstrs()) {
10026     SDValue SplatInsertNode;
10027     if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10028       return SplatInsertNode;
10029   }
10030 
10031   if (Subtarget.hasP9Altivec()) {
10032     SDValue NewISDNode;
10033     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10034       return NewISDNode;
10035 
10036     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10037       return NewISDNode;
10038   }
10039 
10040   if (Subtarget.hasVSX() &&
10041       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10042     if (Swap)
10043       std::swap(V1, V2);
10044     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10045     SDValue Conv2 =
10046         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10047 
10048     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10049                               DAG.getConstant(ShiftElts, dl, MVT::i32));
10050     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10051   }
10052 
10053   if (Subtarget.hasVSX() &&
10054     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10055     if (Swap)
10056       std::swap(V1, V2);
10057     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10058     SDValue Conv2 =
10059         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10060 
10061     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10062                               DAG.getConstant(ShiftElts, dl, MVT::i32));
10063     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10064   }
10065 
10066   if (Subtarget.hasP9Vector()) {
10067      if (PPC::isXXBRHShuffleMask(SVOp)) {
10068       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10069       SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10070       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10071     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10072       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10073       SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10074       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10075     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10076       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10077       SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10078       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10079     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10080       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10081       SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10082       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10083     }
10084   }
10085 
10086   if (Subtarget.hasVSX()) {
10087     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10088       int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10089 
10090       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10091       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10092                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
10093       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10094     }
10095 
10096     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10097     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10098       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10099       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10100       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10101     }
10102   }
10103 
10104   // Cases that are handled by instructions that take permute immediates
10105   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10106   // selected by the instruction selector.
10107   if (V2.isUndef()) {
10108     if (PPC::isSplatShuffleMask(SVOp, 1) ||
10109         PPC::isSplatShuffleMask(SVOp, 2) ||
10110         PPC::isSplatShuffleMask(SVOp, 4) ||
10111         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10112         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10113         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10114         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10115         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10116         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10117         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10118         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10119         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10120         (Subtarget.hasP8Altivec() && (
10121          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10122          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10123          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10124       return Op;
10125     }
10126   }
10127 
10128   // Altivec has a variety of "shuffle immediates" that take two vector inputs
10129   // and produce a fixed permutation.  If any of these match, do not lower to
10130   // VPERM.
10131   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10132   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10133       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10134       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10135       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10136       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10137       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10138       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10139       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10140       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10141       (Subtarget.hasP8Altivec() && (
10142        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10143        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10144        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10145     return Op;
10146 
10147   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
10148   // perfect shuffle table to emit an optimal matching sequence.
10149   ArrayRef<int> PermMask = SVOp->getMask();
10150 
10151   unsigned PFIndexes[4];
10152   bool isFourElementShuffle = true;
10153   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
10154     unsigned EltNo = 8;   // Start out undef.
10155     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
10156       if (PermMask[i*4+j] < 0)
10157         continue;   // Undef, ignore it.
10158 
10159       unsigned ByteSource = PermMask[i*4+j];
10160       if ((ByteSource & 3) != j) {
10161         isFourElementShuffle = false;
10162         break;
10163       }
10164 
10165       if (EltNo == 8) {
10166         EltNo = ByteSource/4;
10167       } else if (EltNo != ByteSource/4) {
10168         isFourElementShuffle = false;
10169         break;
10170       }
10171     }
10172     PFIndexes[i] = EltNo;
10173   }
10174 
10175   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10176   // perfect shuffle vector to determine if it is cost effective to do this as
10177   // discrete instructions, or whether we should use a vperm.
10178   // For now, we skip this for little endian until such time as we have a
10179   // little-endian perfect shuffle table.
10180   if (isFourElementShuffle && !isLittleEndian) {
10181     // Compute the index in the perfect shuffle table.
10182     unsigned PFTableIndex =
10183       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
10184 
10185     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10186     unsigned Cost  = (PFEntry >> 30);
10187 
10188     // Determining when to avoid vperm is tricky.  Many things affect the cost
10189     // of vperm, particularly how many times the perm mask needs to be computed.
10190     // For example, if the perm mask can be hoisted out of a loop or is already
10191     // used (perhaps because there are multiple permutes with the same shuffle
10192     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
10193     // the loop requires an extra register.
10194     //
10195     // As a compromise, we only emit discrete instructions if the shuffle can be
10196     // generated in 3 or fewer operations.  When we have loop information
10197     // available, if this block is within a loop, we should avoid using vperm
10198     // for 3-operation perms and use a constant pool load instead.
10199     if (Cost < 3)
10200       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10201   }
10202 
10203   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10204   // vector that will get spilled to the constant pool.
10205   if (V2.isUndef()) V2 = V1;
10206 
10207   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10208   // that it is in input element units, not in bytes.  Convert now.
10209 
10210   // For little endian, the order of the input vectors is reversed, and
10211   // the permutation mask is complemented with respect to 31.  This is
10212   // necessary to produce proper semantics with the big-endian-biased vperm
10213   // instruction.
10214   EVT EltVT = V1.getValueType().getVectorElementType();
10215   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10216 
10217   SmallVector<SDValue, 16> ResultMask;
10218   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10219     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10220 
10221     for (unsigned j = 0; j != BytesPerElement; ++j)
10222       if (isLittleEndian)
10223         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10224                                              dl, MVT::i32));
10225       else
10226         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10227                                              MVT::i32));
10228   }
10229 
10230   ShufflesHandledWithVPERM++;
10231   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10232   LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
10233   LLVM_DEBUG(SVOp->dump());
10234   LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
10235   LLVM_DEBUG(VPermMask.dump());
10236 
10237   if (isLittleEndian)
10238     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10239                        V2, V1, VPermMask);
10240   else
10241     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10242                        V1, V2, VPermMask);
10243 }
10244 
10245 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10246 /// vector comparison.  If it is, return true and fill in Opc/isDot with
10247 /// information about the intrinsic.
10248 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10249                                  bool &isDot, const PPCSubtarget &Subtarget) {
10250   unsigned IntrinsicID =
10251       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10252   CompareOpc = -1;
10253   isDot = false;
10254   switch (IntrinsicID) {
10255   default:
10256     return false;
10257   // Comparison predicates.
10258   case Intrinsic::ppc_altivec_vcmpbfp_p:
10259     CompareOpc = 966;
10260     isDot = true;
10261     break;
10262   case Intrinsic::ppc_altivec_vcmpeqfp_p:
10263     CompareOpc = 198;
10264     isDot = true;
10265     break;
10266   case Intrinsic::ppc_altivec_vcmpequb_p:
10267     CompareOpc = 6;
10268     isDot = true;
10269     break;
10270   case Intrinsic::ppc_altivec_vcmpequh_p:
10271     CompareOpc = 70;
10272     isDot = true;
10273     break;
10274   case Intrinsic::ppc_altivec_vcmpequw_p:
10275     CompareOpc = 134;
10276     isDot = true;
10277     break;
10278   case Intrinsic::ppc_altivec_vcmpequd_p:
10279     if (Subtarget.hasP8Altivec()) {
10280       CompareOpc = 199;
10281       isDot = true;
10282     } else
10283       return false;
10284     break;
10285   case Intrinsic::ppc_altivec_vcmpneb_p:
10286   case Intrinsic::ppc_altivec_vcmpneh_p:
10287   case Intrinsic::ppc_altivec_vcmpnew_p:
10288   case Intrinsic::ppc_altivec_vcmpnezb_p:
10289   case Intrinsic::ppc_altivec_vcmpnezh_p:
10290   case Intrinsic::ppc_altivec_vcmpnezw_p:
10291     if (Subtarget.hasP9Altivec()) {
10292       switch (IntrinsicID) {
10293       default:
10294         llvm_unreachable("Unknown comparison intrinsic.");
10295       case Intrinsic::ppc_altivec_vcmpneb_p:
10296         CompareOpc = 7;
10297         break;
10298       case Intrinsic::ppc_altivec_vcmpneh_p:
10299         CompareOpc = 71;
10300         break;
10301       case Intrinsic::ppc_altivec_vcmpnew_p:
10302         CompareOpc = 135;
10303         break;
10304       case Intrinsic::ppc_altivec_vcmpnezb_p:
10305         CompareOpc = 263;
10306         break;
10307       case Intrinsic::ppc_altivec_vcmpnezh_p:
10308         CompareOpc = 327;
10309         break;
10310       case Intrinsic::ppc_altivec_vcmpnezw_p:
10311         CompareOpc = 391;
10312         break;
10313       }
10314       isDot = true;
10315     } else
10316       return false;
10317     break;
10318   case Intrinsic::ppc_altivec_vcmpgefp_p:
10319     CompareOpc = 454;
10320     isDot = true;
10321     break;
10322   case Intrinsic::ppc_altivec_vcmpgtfp_p:
10323     CompareOpc = 710;
10324     isDot = true;
10325     break;
10326   case Intrinsic::ppc_altivec_vcmpgtsb_p:
10327     CompareOpc = 774;
10328     isDot = true;
10329     break;
10330   case Intrinsic::ppc_altivec_vcmpgtsh_p:
10331     CompareOpc = 838;
10332     isDot = true;
10333     break;
10334   case Intrinsic::ppc_altivec_vcmpgtsw_p:
10335     CompareOpc = 902;
10336     isDot = true;
10337     break;
10338   case Intrinsic::ppc_altivec_vcmpgtsd_p:
10339     if (Subtarget.hasP8Altivec()) {
10340       CompareOpc = 967;
10341       isDot = true;
10342     } else
10343       return false;
10344     break;
10345   case Intrinsic::ppc_altivec_vcmpgtub_p:
10346     CompareOpc = 518;
10347     isDot = true;
10348     break;
10349   case Intrinsic::ppc_altivec_vcmpgtuh_p:
10350     CompareOpc = 582;
10351     isDot = true;
10352     break;
10353   case Intrinsic::ppc_altivec_vcmpgtuw_p:
10354     CompareOpc = 646;
10355     isDot = true;
10356     break;
10357   case Intrinsic::ppc_altivec_vcmpgtud_p:
10358     if (Subtarget.hasP8Altivec()) {
10359       CompareOpc = 711;
10360       isDot = true;
10361     } else
10362       return false;
10363     break;
10364 
10365   case Intrinsic::ppc_altivec_vcmpequq:
10366   case Intrinsic::ppc_altivec_vcmpgtsq:
10367   case Intrinsic::ppc_altivec_vcmpgtuq:
10368     if (!Subtarget.isISA3_1())
10369       return false;
10370     switch (IntrinsicID) {
10371     default:
10372       llvm_unreachable("Unknown comparison intrinsic.");
10373     case Intrinsic::ppc_altivec_vcmpequq:
10374       CompareOpc = 455;
10375       break;
10376     case Intrinsic::ppc_altivec_vcmpgtsq:
10377       CompareOpc = 903;
10378       break;
10379     case Intrinsic::ppc_altivec_vcmpgtuq:
10380       CompareOpc = 647;
10381       break;
10382     }
10383     break;
10384 
10385   // VSX predicate comparisons use the same infrastructure
10386   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10387   case Intrinsic::ppc_vsx_xvcmpgedp_p:
10388   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10389   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10390   case Intrinsic::ppc_vsx_xvcmpgesp_p:
10391   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10392     if (Subtarget.hasVSX()) {
10393       switch (IntrinsicID) {
10394       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10395         CompareOpc = 99;
10396         break;
10397       case Intrinsic::ppc_vsx_xvcmpgedp_p:
10398         CompareOpc = 115;
10399         break;
10400       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10401         CompareOpc = 107;
10402         break;
10403       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10404         CompareOpc = 67;
10405         break;
10406       case Intrinsic::ppc_vsx_xvcmpgesp_p:
10407         CompareOpc = 83;
10408         break;
10409       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10410         CompareOpc = 75;
10411         break;
10412       }
10413       isDot = true;
10414     } else
10415       return false;
10416     break;
10417 
10418   // Normal Comparisons.
10419   case Intrinsic::ppc_altivec_vcmpbfp:
10420     CompareOpc = 966;
10421     break;
10422   case Intrinsic::ppc_altivec_vcmpeqfp:
10423     CompareOpc = 198;
10424     break;
10425   case Intrinsic::ppc_altivec_vcmpequb:
10426     CompareOpc = 6;
10427     break;
10428   case Intrinsic::ppc_altivec_vcmpequh:
10429     CompareOpc = 70;
10430     break;
10431   case Intrinsic::ppc_altivec_vcmpequw:
10432     CompareOpc = 134;
10433     break;
10434   case Intrinsic::ppc_altivec_vcmpequd:
10435     if (Subtarget.hasP8Altivec())
10436       CompareOpc = 199;
10437     else
10438       return false;
10439     break;
10440   case Intrinsic::ppc_altivec_vcmpneb:
10441   case Intrinsic::ppc_altivec_vcmpneh:
10442   case Intrinsic::ppc_altivec_vcmpnew:
10443   case Intrinsic::ppc_altivec_vcmpnezb:
10444   case Intrinsic::ppc_altivec_vcmpnezh:
10445   case Intrinsic::ppc_altivec_vcmpnezw:
10446     if (Subtarget.hasP9Altivec())
10447       switch (IntrinsicID) {
10448       default:
10449         llvm_unreachable("Unknown comparison intrinsic.");
10450       case Intrinsic::ppc_altivec_vcmpneb:
10451         CompareOpc = 7;
10452         break;
10453       case Intrinsic::ppc_altivec_vcmpneh:
10454         CompareOpc = 71;
10455         break;
10456       case Intrinsic::ppc_altivec_vcmpnew:
10457         CompareOpc = 135;
10458         break;
10459       case Intrinsic::ppc_altivec_vcmpnezb:
10460         CompareOpc = 263;
10461         break;
10462       case Intrinsic::ppc_altivec_vcmpnezh:
10463         CompareOpc = 327;
10464         break;
10465       case Intrinsic::ppc_altivec_vcmpnezw:
10466         CompareOpc = 391;
10467         break;
10468       }
10469     else
10470       return false;
10471     break;
10472   case Intrinsic::ppc_altivec_vcmpgefp:
10473     CompareOpc = 454;
10474     break;
10475   case Intrinsic::ppc_altivec_vcmpgtfp:
10476     CompareOpc = 710;
10477     break;
10478   case Intrinsic::ppc_altivec_vcmpgtsb:
10479     CompareOpc = 774;
10480     break;
10481   case Intrinsic::ppc_altivec_vcmpgtsh:
10482     CompareOpc = 838;
10483     break;
10484   case Intrinsic::ppc_altivec_vcmpgtsw:
10485     CompareOpc = 902;
10486     break;
10487   case Intrinsic::ppc_altivec_vcmpgtsd:
10488     if (Subtarget.hasP8Altivec())
10489       CompareOpc = 967;
10490     else
10491       return false;
10492     break;
10493   case Intrinsic::ppc_altivec_vcmpgtub:
10494     CompareOpc = 518;
10495     break;
10496   case Intrinsic::ppc_altivec_vcmpgtuh:
10497     CompareOpc = 582;
10498     break;
10499   case Intrinsic::ppc_altivec_vcmpgtuw:
10500     CompareOpc = 646;
10501     break;
10502   case Intrinsic::ppc_altivec_vcmpgtud:
10503     if (Subtarget.hasP8Altivec())
10504       CompareOpc = 711;
10505     else
10506       return false;
10507     break;
10508   case Intrinsic::ppc_altivec_vcmpequq_p:
10509   case Intrinsic::ppc_altivec_vcmpgtsq_p:
10510   case Intrinsic::ppc_altivec_vcmpgtuq_p:
10511     if (!Subtarget.isISA3_1())
10512       return false;
10513     switch (IntrinsicID) {
10514     default:
10515       llvm_unreachable("Unknown comparison intrinsic.");
10516     case Intrinsic::ppc_altivec_vcmpequq_p:
10517       CompareOpc = 455;
10518       break;
10519     case Intrinsic::ppc_altivec_vcmpgtsq_p:
10520       CompareOpc = 903;
10521       break;
10522     case Intrinsic::ppc_altivec_vcmpgtuq_p:
10523       CompareOpc = 647;
10524       break;
10525     }
10526     isDot = true;
10527     break;
10528   }
10529   return true;
10530 }
10531 
10532 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10533 /// lower, do it, otherwise return null.
10534 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10535                                                    SelectionDAG &DAG) const {
10536   unsigned IntrinsicID =
10537     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10538 
10539   SDLoc dl(Op);
10540 
10541   switch (IntrinsicID) {
10542   case Intrinsic::thread_pointer:
10543     // Reads the thread pointer register, used for __builtin_thread_pointer.
10544     if (Subtarget.isPPC64())
10545       return DAG.getRegister(PPC::X13, MVT::i64);
10546     return DAG.getRegister(PPC::R2, MVT::i32);
10547 
10548   case Intrinsic::ppc_mma_disassemble_acc:
10549   case Intrinsic::ppc_mma_disassemble_pair: {
10550     int NumVecs = 2;
10551     SDValue WideVec = Op.getOperand(1);
10552     if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10553       NumVecs = 4;
10554       WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10555     }
10556     SmallVector<SDValue, 4> RetOps;
10557     for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10558       SDValue Extract = DAG.getNode(
10559           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10560           DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10561                                                      : VecNo,
10562                           dl, MVT::i64));
10563       RetOps.push_back(Extract);
10564     }
10565     return DAG.getMergeValues(RetOps, dl);
10566   }
10567   }
10568 
10569   // If this is a lowered altivec predicate compare, CompareOpc is set to the
10570   // opcode number of the comparison.
10571   int CompareOpc;
10572   bool isDot;
10573   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10574     return SDValue();    // Don't custom lower most intrinsics.
10575 
10576   // If this is a non-dot comparison, make the VCMP node and we are done.
10577   if (!isDot) {
10578     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10579                               Op.getOperand(1), Op.getOperand(2),
10580                               DAG.getConstant(CompareOpc, dl, MVT::i32));
10581     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10582   }
10583 
10584   // Create the PPCISD altivec 'dot' comparison node.
10585   SDValue Ops[] = {
10586     Op.getOperand(2),  // LHS
10587     Op.getOperand(3),  // RHS
10588     DAG.getConstant(CompareOpc, dl, MVT::i32)
10589   };
10590   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10591   SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10592 
10593   // Now that we have the comparison, emit a copy from the CR to a GPR.
10594   // This is flagged to the above dot comparison.
10595   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10596                                 DAG.getRegister(PPC::CR6, MVT::i32),
10597                                 CompNode.getValue(1));
10598 
10599   // Unpack the result based on how the target uses it.
10600   unsigned BitNo;   // Bit # of CR6.
10601   bool InvertBit;   // Invert result?
10602   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10603   default:  // Can't happen, don't crash on invalid number though.
10604   case 0:   // Return the value of the EQ bit of CR6.
10605     BitNo = 0; InvertBit = false;
10606     break;
10607   case 1:   // Return the inverted value of the EQ bit of CR6.
10608     BitNo = 0; InvertBit = true;
10609     break;
10610   case 2:   // Return the value of the LT bit of CR6.
10611     BitNo = 2; InvertBit = false;
10612     break;
10613   case 3:   // Return the inverted value of the LT bit of CR6.
10614     BitNo = 2; InvertBit = true;
10615     break;
10616   }
10617 
10618   // Shift the bit into the low position.
10619   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10620                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10621   // Isolate the bit.
10622   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10623                       DAG.getConstant(1, dl, MVT::i32));
10624 
10625   // If we are supposed to, toggle the bit.
10626   if (InvertBit)
10627     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10628                         DAG.getConstant(1, dl, MVT::i32));
10629   return Flags;
10630 }
10631 
10632 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10633                                                SelectionDAG &DAG) const {
10634   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10635   // the beginning of the argument list.
10636   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10637   SDLoc DL(Op);
10638   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10639   case Intrinsic::ppc_cfence: {
10640     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10641     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10642     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10643                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
10644                                                   Op.getOperand(ArgStart + 1)),
10645                                       Op.getOperand(0)),
10646                    0);
10647   }
10648   default:
10649     break;
10650   }
10651   return SDValue();
10652 }
10653 
10654 // Lower scalar BSWAP64 to xxbrd.
10655 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10656   SDLoc dl(Op);
10657   // MTVSRDD
10658   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10659                    Op.getOperand(0));
10660   // XXBRD
10661   Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10662   // MFVSRD
10663   int VectorIndex = 0;
10664   if (Subtarget.isLittleEndian())
10665     VectorIndex = 1;
10666   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10667                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10668   return Op;
10669 }
10670 
10671 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10672 // compared to a value that is atomically loaded (atomic loads zero-extend).
10673 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10674                                                 SelectionDAG &DAG) const {
10675   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10676          "Expecting an atomic compare-and-swap here.");
10677   SDLoc dl(Op);
10678   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10679   EVT MemVT = AtomicNode->getMemoryVT();
10680   if (MemVT.getSizeInBits() >= 32)
10681     return Op;
10682 
10683   SDValue CmpOp = Op.getOperand(2);
10684   // If this is already correctly zero-extended, leave it alone.
10685   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10686   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10687     return Op;
10688 
10689   // Clear the high bits of the compare operand.
10690   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10691   SDValue NewCmpOp =
10692     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10693                 DAG.getConstant(MaskVal, dl, MVT::i32));
10694 
10695   // Replace the existing compare operand with the properly zero-extended one.
10696   SmallVector<SDValue, 4> Ops;
10697   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10698     Ops.push_back(AtomicNode->getOperand(i));
10699   Ops[2] = NewCmpOp;
10700   MachineMemOperand *MMO = AtomicNode->getMemOperand();
10701   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10702   auto NodeTy =
10703     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10704   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10705 }
10706 
10707 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10708                                                  SelectionDAG &DAG) const {
10709   SDLoc dl(Op);
10710   // Create a stack slot that is 16-byte aligned.
10711   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10712   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10713   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10714   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10715 
10716   // Store the input value into Value#0 of the stack slot.
10717   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10718                                MachinePointerInfo());
10719   // Load it out.
10720   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10721 }
10722 
10723 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10724                                                   SelectionDAG &DAG) const {
10725   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10726          "Should only be called for ISD::INSERT_VECTOR_ELT");
10727 
10728   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10729   // We have legal lowering for constant indices but not for variable ones.
10730   if (!C)
10731     return SDValue();
10732 
10733   EVT VT = Op.getValueType();
10734   SDLoc dl(Op);
10735   SDValue V1 = Op.getOperand(0);
10736   SDValue V2 = Op.getOperand(1);
10737   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10738   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10739     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10740     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10741     unsigned InsertAtElement = C->getZExtValue();
10742     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10743     if (Subtarget.isLittleEndian()) {
10744       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10745     }
10746     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10747                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
10748   }
10749   return Op;
10750 }
10751 
10752 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10753                                            SelectionDAG &DAG) const {
10754   SDLoc dl(Op);
10755   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10756   SDValue LoadChain = LN->getChain();
10757   SDValue BasePtr = LN->getBasePtr();
10758   EVT VT = Op.getValueType();
10759 
10760   if (VT != MVT::v256i1 && VT != MVT::v512i1)
10761     return Op;
10762 
10763   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10764   // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10765   // 2 or 4 vsx registers.
10766   assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10767          "Type unsupported without MMA");
10768   assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10769          "Type unsupported without paired vector support");
10770   Align Alignment = LN->getAlign();
10771   SmallVector<SDValue, 4> Loads;
10772   SmallVector<SDValue, 4> LoadChains;
10773   unsigned NumVecs = VT.getSizeInBits() / 128;
10774   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10775     SDValue Load =
10776         DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10777                     LN->getPointerInfo().getWithOffset(Idx * 16),
10778                     commonAlignment(Alignment, Idx * 16),
10779                     LN->getMemOperand()->getFlags(), LN->getAAInfo());
10780     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10781                           DAG.getConstant(16, dl, BasePtr.getValueType()));
10782     Loads.push_back(Load);
10783     LoadChains.push_back(Load.getValue(1));
10784   }
10785   if (Subtarget.isLittleEndian()) {
10786     std::reverse(Loads.begin(), Loads.end());
10787     std::reverse(LoadChains.begin(), LoadChains.end());
10788   }
10789   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10790   SDValue Value =
10791       DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
10792                   dl, VT, Loads);
10793   SDValue RetOps[] = {Value, TF};
10794   return DAG.getMergeValues(RetOps, dl);
10795 }
10796 
10797 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10798                                             SelectionDAG &DAG) const {
10799   SDLoc dl(Op);
10800   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10801   SDValue StoreChain = SN->getChain();
10802   SDValue BasePtr = SN->getBasePtr();
10803   SDValue Value = SN->getValue();
10804   EVT StoreVT = Value.getValueType();
10805 
10806   if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10807     return Op;
10808 
10809   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10810   // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10811   // underlying registers individually.
10812   assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10813          "Type unsupported without MMA");
10814   assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10815          "Type unsupported without paired vector support");
10816   Align Alignment = SN->getAlign();
10817   SmallVector<SDValue, 4> Stores;
10818   unsigned NumVecs = 2;
10819   if (StoreVT == MVT::v512i1) {
10820     Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
10821     NumVecs = 4;
10822   }
10823   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10824     unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10825     SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
10826                               DAG.getConstant(VecNum, dl, MVT::i64));
10827     SDValue Store =
10828         DAG.getStore(StoreChain, dl, Elt, BasePtr,
10829                      SN->getPointerInfo().getWithOffset(Idx * 16),
10830                      commonAlignment(Alignment, Idx * 16),
10831                      SN->getMemOperand()->getFlags(), SN->getAAInfo());
10832     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10833                           DAG.getConstant(16, dl, BasePtr.getValueType()));
10834     Stores.push_back(Store);
10835   }
10836   SDValue TF = DAG.getTokenFactor(dl, Stores);
10837   return TF;
10838 }
10839 
10840 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10841   SDLoc dl(Op);
10842   if (Op.getValueType() == MVT::v4i32) {
10843     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10844 
10845     SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10846     // +16 as shift amt.
10847     SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10848     SDValue RHSSwap =   // = vrlw RHS, 16
10849       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10850 
10851     // Shrinkify inputs to v8i16.
10852     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10853     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10854     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10855 
10856     // Low parts multiplied together, generating 32-bit results (we ignore the
10857     // top parts).
10858     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10859                                         LHS, RHS, DAG, dl, MVT::v4i32);
10860 
10861     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10862                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10863     // Shift the high parts up 16 bits.
10864     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10865                               Neg16, DAG, dl);
10866     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10867   } else if (Op.getValueType() == MVT::v16i8) {
10868     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10869     bool isLittleEndian = Subtarget.isLittleEndian();
10870 
10871     // Multiply the even 8-bit parts, producing 16-bit sums.
10872     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10873                                            LHS, RHS, DAG, dl, MVT::v8i16);
10874     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10875 
10876     // Multiply the odd 8-bit parts, producing 16-bit sums.
10877     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10878                                           LHS, RHS, DAG, dl, MVT::v8i16);
10879     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10880 
10881     // Merge the results together.  Because vmuleub and vmuloub are
10882     // instructions with a big-endian bias, we must reverse the
10883     // element numbering and reverse the meaning of "odd" and "even"
10884     // when generating little endian code.
10885     int Ops[16];
10886     for (unsigned i = 0; i != 8; ++i) {
10887       if (isLittleEndian) {
10888         Ops[i*2  ] = 2*i;
10889         Ops[i*2+1] = 2*i+16;
10890       } else {
10891         Ops[i*2  ] = 2*i+1;
10892         Ops[i*2+1] = 2*i+1+16;
10893       }
10894     }
10895     if (isLittleEndian)
10896       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10897     else
10898       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10899   } else {
10900     llvm_unreachable("Unknown mul to lower!");
10901   }
10902 }
10903 
10904 // Custom lowering for fpext vf32 to v2f64
10905 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10906 
10907   assert(Op.getOpcode() == ISD::FP_EXTEND &&
10908          "Should only be called for ISD::FP_EXTEND");
10909 
10910   // FIXME: handle extends from half precision float vectors on P9.
10911   // We only want to custom lower an extend from v2f32 to v2f64.
10912   if (Op.getValueType() != MVT::v2f64 ||
10913       Op.getOperand(0).getValueType() != MVT::v2f32)
10914     return SDValue();
10915 
10916   SDLoc dl(Op);
10917   SDValue Op0 = Op.getOperand(0);
10918 
10919   switch (Op0.getOpcode()) {
10920   default:
10921     return SDValue();
10922   case ISD::EXTRACT_SUBVECTOR: {
10923     assert(Op0.getNumOperands() == 2 &&
10924            isa<ConstantSDNode>(Op0->getOperand(1)) &&
10925            "Node should have 2 operands with second one being a constant!");
10926 
10927     if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10928       return SDValue();
10929 
10930     // Custom lower is only done for high or low doubleword.
10931     int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10932     if (Idx % 2 != 0)
10933       return SDValue();
10934 
10935     // Since input is v4f32, at this point Idx is either 0 or 2.
10936     // Shift to get the doubleword position we want.
10937     int DWord = Idx >> 1;
10938 
10939     // High and low word positions are different on little endian.
10940     if (Subtarget.isLittleEndian())
10941       DWord ^= 0x1;
10942 
10943     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10944                        Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10945   }
10946   case ISD::FADD:
10947   case ISD::FMUL:
10948   case ISD::FSUB: {
10949     SDValue NewLoad[2];
10950     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10951       // Ensure both input are loads.
10952       SDValue LdOp = Op0.getOperand(i);
10953       if (LdOp.getOpcode() != ISD::LOAD)
10954         return SDValue();
10955       // Generate new load node.
10956       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10957       SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10958       NewLoad[i] = DAG.getMemIntrinsicNode(
10959           PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10960           LD->getMemoryVT(), LD->getMemOperand());
10961     }
10962     SDValue NewOp =
10963         DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10964                     NewLoad[1], Op0.getNode()->getFlags());
10965     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10966                        DAG.getConstant(0, dl, MVT::i32));
10967   }
10968   case ISD::LOAD: {
10969     LoadSDNode *LD = cast<LoadSDNode>(Op0);
10970     SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10971     SDValue NewLd = DAG.getMemIntrinsicNode(
10972         PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10973         LD->getMemoryVT(), LD->getMemOperand());
10974     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10975                        DAG.getConstant(0, dl, MVT::i32));
10976   }
10977   }
10978   llvm_unreachable("ERROR:Should return for all cases within swtich.");
10979 }
10980 
10981 /// LowerOperation - Provide custom lowering hooks for some operations.
10982 ///
10983 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10984   switch (Op.getOpcode()) {
10985   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10986   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
10987   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
10988   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
10989   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
10990   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
10991   case ISD::SETCC:              return LowerSETCC(Op, DAG);
10992   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
10993   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
10994 
10995   // Variable argument lowering.
10996   case ISD::VASTART:            return LowerVASTART(Op, DAG);
10997   case ISD::VAARG:              return LowerVAARG(Op, DAG);
10998   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
10999 
11000   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
11001   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11002   case ISD::GET_DYNAMIC_AREA_OFFSET:
11003     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11004 
11005   // Exception handling lowering.
11006   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
11007   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
11008   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
11009 
11010   case ISD::LOAD:               return LowerLOAD(Op, DAG);
11011   case ISD::STORE:              return LowerSTORE(Op, DAG);
11012   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
11013   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
11014   case ISD::STRICT_FP_TO_UINT:
11015   case ISD::STRICT_FP_TO_SINT:
11016   case ISD::FP_TO_UINT:
11017   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11018   case ISD::STRICT_UINT_TO_FP:
11019   case ISD::STRICT_SINT_TO_FP:
11020   case ISD::UINT_TO_FP:
11021   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
11022   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
11023 
11024   // Lower 64-bit shifts.
11025   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
11026   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
11027   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
11028 
11029   case ISD::FSHL:               return LowerFunnelShift(Op, DAG);
11030   case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
11031 
11032   // Vector-related lowering.
11033   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
11034   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
11035   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11036   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
11037   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
11038   case ISD::MUL:                return LowerMUL(Op, DAG);
11039   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
11040   case ISD::ROTL:               return LowerROTL(Op, DAG);
11041 
11042   // For counter-based loop handling.
11043   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
11044 
11045   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
11046 
11047   // Frame & Return address.
11048   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
11049   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
11050 
11051   case ISD::INTRINSIC_VOID:
11052     return LowerINTRINSIC_VOID(Op, DAG);
11053   case ISD::BSWAP:
11054     return LowerBSWAP(Op, DAG);
11055   case ISD::ATOMIC_CMP_SWAP:
11056     return LowerATOMIC_CMP_SWAP(Op, DAG);
11057   }
11058 }
11059 
11060 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
11061                                            SmallVectorImpl<SDValue>&Results,
11062                                            SelectionDAG &DAG) const {
11063   SDLoc dl(N);
11064   switch (N->getOpcode()) {
11065   default:
11066     llvm_unreachable("Do not know how to custom type legalize this operation!");
11067   case ISD::READCYCLECOUNTER: {
11068     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11069     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11070 
11071     Results.push_back(
11072         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11073     Results.push_back(RTB.getValue(2));
11074     break;
11075   }
11076   case ISD::INTRINSIC_W_CHAIN: {
11077     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11078         Intrinsic::loop_decrement)
11079       break;
11080 
11081     assert(N->getValueType(0) == MVT::i1 &&
11082            "Unexpected result type for CTR decrement intrinsic");
11083     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11084                                  N->getValueType(0));
11085     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11086     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11087                                  N->getOperand(1));
11088 
11089     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11090     Results.push_back(NewInt.getValue(1));
11091     break;
11092   }
11093   case ISD::VAARG: {
11094     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11095       return;
11096 
11097     EVT VT = N->getValueType(0);
11098 
11099     if (VT == MVT::i64) {
11100       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11101 
11102       Results.push_back(NewNode);
11103       Results.push_back(NewNode.getValue(1));
11104     }
11105     return;
11106   }
11107   case ISD::STRICT_FP_TO_SINT:
11108   case ISD::STRICT_FP_TO_UINT:
11109   case ISD::FP_TO_SINT:
11110   case ISD::FP_TO_UINT:
11111     // LowerFP_TO_INT() can only handle f32 and f64.
11112     if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11113         MVT::ppcf128)
11114       return;
11115     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
11116     return;
11117   case ISD::TRUNCATE: {
11118     if (!N->getValueType(0).isVector())
11119       return;
11120     SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11121     if (Lowered)
11122       Results.push_back(Lowered);
11123     return;
11124   }
11125   case ISD::FSHL:
11126   case ISD::FSHR:
11127     // Don't handle funnel shifts here.
11128     return;
11129   case ISD::BITCAST:
11130     // Don't handle bitcast here.
11131     return;
11132   case ISD::FP_EXTEND:
11133     SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11134     if (Lowered)
11135       Results.push_back(Lowered);
11136     return;
11137   }
11138 }
11139 
11140 //===----------------------------------------------------------------------===//
11141 //  Other Lowering Code
11142 //===----------------------------------------------------------------------===//
11143 
11144 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
11145   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11146   Function *Func = Intrinsic::getDeclaration(M, Id);
11147   return Builder.CreateCall(Func, {});
11148 }
11149 
11150 // The mappings for emitLeading/TrailingFence is taken from
11151 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11152 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
11153                                                  Instruction *Inst,
11154                                                  AtomicOrdering Ord) const {
11155   if (Ord == AtomicOrdering::SequentiallyConsistent)
11156     return callIntrinsic(Builder, Intrinsic::ppc_sync);
11157   if (isReleaseOrStronger(Ord))
11158     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11159   return nullptr;
11160 }
11161 
11162 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
11163                                                   Instruction *Inst,
11164                                                   AtomicOrdering Ord) const {
11165   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11166     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11167     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11168     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11169     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11170       return Builder.CreateCall(
11171           Intrinsic::getDeclaration(
11172               Builder.GetInsertBlock()->getParent()->getParent(),
11173               Intrinsic::ppc_cfence, {Inst->getType()}),
11174           {Inst});
11175     // FIXME: Can use isync for rmw operation.
11176     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11177   }
11178   return nullptr;
11179 }
11180 
11181 MachineBasicBlock *
11182 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
11183                                     unsigned AtomicSize,
11184                                     unsigned BinOpcode,
11185                                     unsigned CmpOpcode,
11186                                     unsigned CmpPred) const {
11187   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11188   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11189 
11190   auto LoadMnemonic = PPC::LDARX;
11191   auto StoreMnemonic = PPC::STDCX;
11192   switch (AtomicSize) {
11193   default:
11194     llvm_unreachable("Unexpected size of atomic entity");
11195   case 1:
11196     LoadMnemonic = PPC::LBARX;
11197     StoreMnemonic = PPC::STBCX;
11198     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11199     break;
11200   case 2:
11201     LoadMnemonic = PPC::LHARX;
11202     StoreMnemonic = PPC::STHCX;
11203     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11204     break;
11205   case 4:
11206     LoadMnemonic = PPC::LWARX;
11207     StoreMnemonic = PPC::STWCX;
11208     break;
11209   case 8:
11210     LoadMnemonic = PPC::LDARX;
11211     StoreMnemonic = PPC::STDCX;
11212     break;
11213   }
11214 
11215   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11216   MachineFunction *F = BB->getParent();
11217   MachineFunction::iterator It = ++BB->getIterator();
11218 
11219   Register dest = MI.getOperand(0).getReg();
11220   Register ptrA = MI.getOperand(1).getReg();
11221   Register ptrB = MI.getOperand(2).getReg();
11222   Register incr = MI.getOperand(3).getReg();
11223   DebugLoc dl = MI.getDebugLoc();
11224 
11225   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11226   MachineBasicBlock *loop2MBB =
11227     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11228   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11229   F->insert(It, loopMBB);
11230   if (CmpOpcode)
11231     F->insert(It, loop2MBB);
11232   F->insert(It, exitMBB);
11233   exitMBB->splice(exitMBB->begin(), BB,
11234                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11235   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11236 
11237   MachineRegisterInfo &RegInfo = F->getRegInfo();
11238   Register TmpReg = (!BinOpcode) ? incr :
11239     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11240                                            : &PPC::GPRCRegClass);
11241 
11242   //  thisMBB:
11243   //   ...
11244   //   fallthrough --> loopMBB
11245   BB->addSuccessor(loopMBB);
11246 
11247   //  loopMBB:
11248   //   l[wd]arx dest, ptr
11249   //   add r0, dest, incr
11250   //   st[wd]cx. r0, ptr
11251   //   bne- loopMBB
11252   //   fallthrough --> exitMBB
11253 
11254   // For max/min...
11255   //  loopMBB:
11256   //   l[wd]arx dest, ptr
11257   //   cmpl?[wd] incr, dest
11258   //   bgt exitMBB
11259   //  loop2MBB:
11260   //   st[wd]cx. dest, ptr
11261   //   bne- loopMBB
11262   //   fallthrough --> exitMBB
11263 
11264   BB = loopMBB;
11265   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11266     .addReg(ptrA).addReg(ptrB);
11267   if (BinOpcode)
11268     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11269   if (CmpOpcode) {
11270     // Signed comparisons of byte or halfword values must be sign-extended.
11271     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11272       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11273       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11274               ExtReg).addReg(dest);
11275       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11276         .addReg(incr).addReg(ExtReg);
11277     } else
11278       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11279         .addReg(incr).addReg(dest);
11280 
11281     BuildMI(BB, dl, TII->get(PPC::BCC))
11282       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11283     BB->addSuccessor(loop2MBB);
11284     BB->addSuccessor(exitMBB);
11285     BB = loop2MBB;
11286   }
11287   BuildMI(BB, dl, TII->get(StoreMnemonic))
11288     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11289   BuildMI(BB, dl, TII->get(PPC::BCC))
11290     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11291   BB->addSuccessor(loopMBB);
11292   BB->addSuccessor(exitMBB);
11293 
11294   //  exitMBB:
11295   //   ...
11296   BB = exitMBB;
11297   return BB;
11298 }
11299 
11300 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
11301     MachineInstr &MI, MachineBasicBlock *BB,
11302     bool is8bit, // operation
11303     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11304   // If we support part-word atomic mnemonics, just use them
11305   if (Subtarget.hasPartwordAtomics())
11306     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11307                             CmpPred);
11308 
11309   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11310   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11311   // In 64 bit mode we have to use 64 bits for addresses, even though the
11312   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
11313   // registers without caring whether they're 32 or 64, but here we're
11314   // doing actual arithmetic on the addresses.
11315   bool is64bit = Subtarget.isPPC64();
11316   bool isLittleEndian = Subtarget.isLittleEndian();
11317   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11318 
11319   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11320   MachineFunction *F = BB->getParent();
11321   MachineFunction::iterator It = ++BB->getIterator();
11322 
11323   Register dest = MI.getOperand(0).getReg();
11324   Register ptrA = MI.getOperand(1).getReg();
11325   Register ptrB = MI.getOperand(2).getReg();
11326   Register incr = MI.getOperand(3).getReg();
11327   DebugLoc dl = MI.getDebugLoc();
11328 
11329   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11330   MachineBasicBlock *loop2MBB =
11331       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11332   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11333   F->insert(It, loopMBB);
11334   if (CmpOpcode)
11335     F->insert(It, loop2MBB);
11336   F->insert(It, exitMBB);
11337   exitMBB->splice(exitMBB->begin(), BB,
11338                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11339   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11340 
11341   MachineRegisterInfo &RegInfo = F->getRegInfo();
11342   const TargetRegisterClass *RC =
11343       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11344   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11345 
11346   Register PtrReg = RegInfo.createVirtualRegister(RC);
11347   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11348   Register ShiftReg =
11349       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11350   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11351   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11352   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11353   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11354   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11355   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11356   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11357   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11358   Register Ptr1Reg;
11359   Register TmpReg =
11360       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11361 
11362   //  thisMBB:
11363   //   ...
11364   //   fallthrough --> loopMBB
11365   BB->addSuccessor(loopMBB);
11366 
11367   // The 4-byte load must be aligned, while a char or short may be
11368   // anywhere in the word.  Hence all this nasty bookkeeping code.
11369   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
11370   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11371   //   xori shift, shift1, 24 [16]
11372   //   rlwinm ptr, ptr1, 0, 0, 29
11373   //   slw incr2, incr, shift
11374   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11375   //   slw mask, mask2, shift
11376   //  loopMBB:
11377   //   lwarx tmpDest, ptr
11378   //   add tmp, tmpDest, incr2
11379   //   andc tmp2, tmpDest, mask
11380   //   and tmp3, tmp, mask
11381   //   or tmp4, tmp3, tmp2
11382   //   stwcx. tmp4, ptr
11383   //   bne- loopMBB
11384   //   fallthrough --> exitMBB
11385   //   srw dest, tmpDest, shift
11386   if (ptrA != ZeroReg) {
11387     Ptr1Reg = RegInfo.createVirtualRegister(RC);
11388     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11389         .addReg(ptrA)
11390         .addReg(ptrB);
11391   } else {
11392     Ptr1Reg = ptrB;
11393   }
11394   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11395   // mode.
11396   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11397       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11398       .addImm(3)
11399       .addImm(27)
11400       .addImm(is8bit ? 28 : 27);
11401   if (!isLittleEndian)
11402     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11403         .addReg(Shift1Reg)
11404         .addImm(is8bit ? 24 : 16);
11405   if (is64bit)
11406     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11407         .addReg(Ptr1Reg)
11408         .addImm(0)
11409         .addImm(61);
11410   else
11411     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11412         .addReg(Ptr1Reg)
11413         .addImm(0)
11414         .addImm(0)
11415         .addImm(29);
11416   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11417   if (is8bit)
11418     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11419   else {
11420     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11421     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11422         .addReg(Mask3Reg)
11423         .addImm(65535);
11424   }
11425   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11426       .addReg(Mask2Reg)
11427       .addReg(ShiftReg);
11428 
11429   BB = loopMBB;
11430   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11431       .addReg(ZeroReg)
11432       .addReg(PtrReg);
11433   if (BinOpcode)
11434     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11435         .addReg(Incr2Reg)
11436         .addReg(TmpDestReg);
11437   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11438       .addReg(TmpDestReg)
11439       .addReg(MaskReg);
11440   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11441   if (CmpOpcode) {
11442     // For unsigned comparisons, we can directly compare the shifted values.
11443     // For signed comparisons we shift and sign extend.
11444     Register SReg = RegInfo.createVirtualRegister(GPRC);
11445     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11446         .addReg(TmpDestReg)
11447         .addReg(MaskReg);
11448     unsigned ValueReg = SReg;
11449     unsigned CmpReg = Incr2Reg;
11450     if (CmpOpcode == PPC::CMPW) {
11451       ValueReg = RegInfo.createVirtualRegister(GPRC);
11452       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11453           .addReg(SReg)
11454           .addReg(ShiftReg);
11455       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11456       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11457           .addReg(ValueReg);
11458       ValueReg = ValueSReg;
11459       CmpReg = incr;
11460     }
11461     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11462         .addReg(CmpReg)
11463         .addReg(ValueReg);
11464     BuildMI(BB, dl, TII->get(PPC::BCC))
11465         .addImm(CmpPred)
11466         .addReg(PPC::CR0)
11467         .addMBB(exitMBB);
11468     BB->addSuccessor(loop2MBB);
11469     BB->addSuccessor(exitMBB);
11470     BB = loop2MBB;
11471   }
11472   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11473   BuildMI(BB, dl, TII->get(PPC::STWCX))
11474       .addReg(Tmp4Reg)
11475       .addReg(ZeroReg)
11476       .addReg(PtrReg);
11477   BuildMI(BB, dl, TII->get(PPC::BCC))
11478       .addImm(PPC::PRED_NE)
11479       .addReg(PPC::CR0)
11480       .addMBB(loopMBB);
11481   BB->addSuccessor(loopMBB);
11482   BB->addSuccessor(exitMBB);
11483 
11484   //  exitMBB:
11485   //   ...
11486   BB = exitMBB;
11487   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11488       .addReg(TmpDestReg)
11489       .addReg(ShiftReg);
11490   return BB;
11491 }
11492 
11493 llvm::MachineBasicBlock *
11494 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
11495                                     MachineBasicBlock *MBB) const {
11496   DebugLoc DL = MI.getDebugLoc();
11497   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11498   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11499 
11500   MachineFunction *MF = MBB->getParent();
11501   MachineRegisterInfo &MRI = MF->getRegInfo();
11502 
11503   const BasicBlock *BB = MBB->getBasicBlock();
11504   MachineFunction::iterator I = ++MBB->getIterator();
11505 
11506   Register DstReg = MI.getOperand(0).getReg();
11507   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11508   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11509   Register mainDstReg = MRI.createVirtualRegister(RC);
11510   Register restoreDstReg = MRI.createVirtualRegister(RC);
11511 
11512   MVT PVT = getPointerTy(MF->getDataLayout());
11513   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11514          "Invalid Pointer Size!");
11515   // For v = setjmp(buf), we generate
11516   //
11517   // thisMBB:
11518   //  SjLjSetup mainMBB
11519   //  bl mainMBB
11520   //  v_restore = 1
11521   //  b sinkMBB
11522   //
11523   // mainMBB:
11524   //  buf[LabelOffset] = LR
11525   //  v_main = 0
11526   //
11527   // sinkMBB:
11528   //  v = phi(main, restore)
11529   //
11530 
11531   MachineBasicBlock *thisMBB = MBB;
11532   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
11533   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
11534   MF->insert(I, mainMBB);
11535   MF->insert(I, sinkMBB);
11536 
11537   MachineInstrBuilder MIB;
11538 
11539   // Transfer the remainder of BB and its successor edges to sinkMBB.
11540   sinkMBB->splice(sinkMBB->begin(), MBB,
11541                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11542   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
11543 
11544   // Note that the structure of the jmp_buf used here is not compatible
11545   // with that used by libc, and is not designed to be. Specifically, it
11546   // stores only those 'reserved' registers that LLVM does not otherwise
11547   // understand how to spill. Also, by convention, by the time this
11548   // intrinsic is called, Clang has already stored the frame address in the
11549   // first slot of the buffer and stack address in the third. Following the
11550   // X86 target code, we'll store the jump address in the second slot. We also
11551   // need to save the TOC pointer (R2) to handle jumps between shared
11552   // libraries, and that will be stored in the fourth slot. The thread
11553   // identifier (R13) is not affected.
11554 
11555   // thisMBB:
11556   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11557   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11558   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11559 
11560   // Prepare IP either in reg.
11561   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11562   Register LabelReg = MRI.createVirtualRegister(PtrRC);
11563   Register BufReg = MI.getOperand(1).getReg();
11564 
11565   if (Subtarget.is64BitELFABI()) {
11566     setUsesTOCBasePtr(*MBB->getParent());
11567     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11568               .addReg(PPC::X2)
11569               .addImm(TOCOffset)
11570               .addReg(BufReg)
11571               .cloneMemRefs(MI);
11572   }
11573 
11574   // Naked functions never have a base pointer, and so we use r1. For all
11575   // other functions, this decision must be delayed until during PEI.
11576   unsigned BaseReg;
11577   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11578     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11579   else
11580     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11581 
11582   MIB = BuildMI(*thisMBB, MI, DL,
11583                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11584             .addReg(BaseReg)
11585             .addImm(BPOffset)
11586             .addReg(BufReg)
11587             .cloneMemRefs(MI);
11588 
11589   // Setup
11590   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11591   MIB.addRegMask(TRI->getNoPreservedMask());
11592 
11593   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11594 
11595   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11596           .addMBB(mainMBB);
11597   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11598 
11599   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11600   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11601 
11602   // mainMBB:
11603   //  mainDstReg = 0
11604   MIB =
11605       BuildMI(mainMBB, DL,
11606               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11607 
11608   // Store IP
11609   if (Subtarget.isPPC64()) {
11610     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11611             .addReg(LabelReg)
11612             .addImm(LabelOffset)
11613             .addReg(BufReg);
11614   } else {
11615     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11616             .addReg(LabelReg)
11617             .addImm(LabelOffset)
11618             .addReg(BufReg);
11619   }
11620   MIB.cloneMemRefs(MI);
11621 
11622   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11623   mainMBB->addSuccessor(sinkMBB);
11624 
11625   // sinkMBB:
11626   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11627           TII->get(PPC::PHI), DstReg)
11628     .addReg(mainDstReg).addMBB(mainMBB)
11629     .addReg(restoreDstReg).addMBB(thisMBB);
11630 
11631   MI.eraseFromParent();
11632   return sinkMBB;
11633 }
11634 
11635 MachineBasicBlock *
11636 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11637                                      MachineBasicBlock *MBB) const {
11638   DebugLoc DL = MI.getDebugLoc();
11639   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11640 
11641   MachineFunction *MF = MBB->getParent();
11642   MachineRegisterInfo &MRI = MF->getRegInfo();
11643 
11644   MVT PVT = getPointerTy(MF->getDataLayout());
11645   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11646          "Invalid Pointer Size!");
11647 
11648   const TargetRegisterClass *RC =
11649     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11650   Register Tmp = MRI.createVirtualRegister(RC);
11651   // Since FP is only updated here but NOT referenced, it's treated as GPR.
11652   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11653   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11654   unsigned BP =
11655       (PVT == MVT::i64)
11656           ? PPC::X30
11657           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11658                                                               : PPC::R30);
11659 
11660   MachineInstrBuilder MIB;
11661 
11662   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11663   const int64_t SPOffset    = 2 * PVT.getStoreSize();
11664   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11665   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11666 
11667   Register BufReg = MI.getOperand(0).getReg();
11668 
11669   // Reload FP (the jumped-to function may not have had a
11670   // frame pointer, and if so, then its r31 will be restored
11671   // as necessary).
11672   if (PVT == MVT::i64) {
11673     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11674             .addImm(0)
11675             .addReg(BufReg);
11676   } else {
11677     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11678             .addImm(0)
11679             .addReg(BufReg);
11680   }
11681   MIB.cloneMemRefs(MI);
11682 
11683   // Reload IP
11684   if (PVT == MVT::i64) {
11685     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11686             .addImm(LabelOffset)
11687             .addReg(BufReg);
11688   } else {
11689     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11690             .addImm(LabelOffset)
11691             .addReg(BufReg);
11692   }
11693   MIB.cloneMemRefs(MI);
11694 
11695   // Reload SP
11696   if (PVT == MVT::i64) {
11697     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11698             .addImm(SPOffset)
11699             .addReg(BufReg);
11700   } else {
11701     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11702             .addImm(SPOffset)
11703             .addReg(BufReg);
11704   }
11705   MIB.cloneMemRefs(MI);
11706 
11707   // Reload BP
11708   if (PVT == MVT::i64) {
11709     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11710             .addImm(BPOffset)
11711             .addReg(BufReg);
11712   } else {
11713     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11714             .addImm(BPOffset)
11715             .addReg(BufReg);
11716   }
11717   MIB.cloneMemRefs(MI);
11718 
11719   // Reload TOC
11720   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11721     setUsesTOCBasePtr(*MBB->getParent());
11722     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11723               .addImm(TOCOffset)
11724               .addReg(BufReg)
11725               .cloneMemRefs(MI);
11726   }
11727 
11728   // Jump
11729   BuildMI(*MBB, MI, DL,
11730           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11731   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11732 
11733   MI.eraseFromParent();
11734   return MBB;
11735 }
11736 
11737 bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
11738   // If the function specifically requests inline stack probes, emit them.
11739   if (MF.getFunction().hasFnAttribute("probe-stack"))
11740     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11741            "inline-asm";
11742   return false;
11743 }
11744 
11745 unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
11746   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11747   unsigned StackAlign = TFI->getStackAlignment();
11748   assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
11749          "Unexpected stack alignment");
11750   // The default stack probe size is 4096 if the function has no
11751   // stack-probe-size attribute.
11752   unsigned StackProbeSize = 4096;
11753   const Function &Fn = MF.getFunction();
11754   if (Fn.hasFnAttribute("stack-probe-size"))
11755     Fn.getFnAttribute("stack-probe-size")
11756         .getValueAsString()
11757         .getAsInteger(0, StackProbeSize);
11758   // Round down to the stack alignment.
11759   StackProbeSize &= ~(StackAlign - 1);
11760   return StackProbeSize ? StackProbeSize : StackAlign;
11761 }
11762 
11763 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11764 // into three phases. In the first phase, it uses pseudo instruction
11765 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11766 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11767 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11768 // MaxCallFrameSize so that it can calculate correct data area pointer.
11769 MachineBasicBlock *
11770 PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
11771                                     MachineBasicBlock *MBB) const {
11772   const bool isPPC64 = Subtarget.isPPC64();
11773   MachineFunction *MF = MBB->getParent();
11774   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11775   DebugLoc DL = MI.getDebugLoc();
11776   const unsigned ProbeSize = getStackProbeSize(*MF);
11777   const BasicBlock *ProbedBB = MBB->getBasicBlock();
11778   MachineRegisterInfo &MRI = MF->getRegInfo();
11779   // The CFG of probing stack looks as
11780   //         +-----+
11781   //         | MBB |
11782   //         +--+--+
11783   //            |
11784   //       +----v----+
11785   //  +--->+ TestMBB +---+
11786   //  |    +----+----+   |
11787   //  |         |        |
11788   //  |   +-----v----+   |
11789   //  +---+ BlockMBB |   |
11790   //      +----------+   |
11791   //                     |
11792   //       +---------+   |
11793   //       | TailMBB +<--+
11794   //       +---------+
11795   // In MBB, calculate previous frame pointer and final stack pointer.
11796   // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11797   // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11798   // TailMBB is spliced via \p MI.
11799   MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11800   MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11801   MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11802 
11803   MachineFunction::iterator MBBIter = ++MBB->getIterator();
11804   MF->insert(MBBIter, TestMBB);
11805   MF->insert(MBBIter, BlockMBB);
11806   MF->insert(MBBIter, TailMBB);
11807 
11808   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11809   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11810 
11811   Register DstReg = MI.getOperand(0).getReg();
11812   Register NegSizeReg = MI.getOperand(1).getReg();
11813   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11814   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11815   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11816   Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11817 
11818   // Since value of NegSizeReg might be realigned in prologepilog, insert a
11819   // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11820   // NegSize.
11821   unsigned ProbeOpc;
11822   if (!MRI.hasOneNonDBGUse(NegSizeReg))
11823     ProbeOpc =
11824         isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11825   else
11826     // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11827     // and NegSizeReg will be allocated in the same phyreg to avoid
11828     // redundant copy when NegSizeReg has only one use which is current MI and
11829     // will be replaced by PREPARE_PROBED_ALLOCA then.
11830     ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11831                        : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11832   BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11833       .addDef(ActualNegSizeReg)
11834       .addReg(NegSizeReg)
11835       .add(MI.getOperand(2))
11836       .add(MI.getOperand(3));
11837 
11838   // Calculate final stack pointer, which equals to SP + ActualNegSize.
11839   BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11840           FinalStackPtr)
11841       .addReg(SPReg)
11842       .addReg(ActualNegSizeReg);
11843 
11844   // Materialize a scratch register for update.
11845   int64_t NegProbeSize = -(int64_t)ProbeSize;
11846   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11847   Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11848   if (!isInt<16>(NegProbeSize)) {
11849     Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11850     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11851         .addImm(NegProbeSize >> 16);
11852     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11853             ScratchReg)
11854         .addReg(TempReg)
11855         .addImm(NegProbeSize & 0xFFFF);
11856   } else
11857     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11858         .addImm(NegProbeSize);
11859 
11860   {
11861     // Probing leading residual part.
11862     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11863     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11864         .addReg(ActualNegSizeReg)
11865         .addReg(ScratchReg);
11866     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11867     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11868         .addReg(Div)
11869         .addReg(ScratchReg);
11870     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11871     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11872         .addReg(Mul)
11873         .addReg(ActualNegSizeReg);
11874     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11875         .addReg(FramePointer)
11876         .addReg(SPReg)
11877         .addReg(NegMod);
11878   }
11879 
11880   {
11881     // Remaining part should be multiple of ProbeSize.
11882     Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11883     BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11884         .addReg(SPReg)
11885         .addReg(FinalStackPtr);
11886     BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11887         .addImm(PPC::PRED_EQ)
11888         .addReg(CmpResult)
11889         .addMBB(TailMBB);
11890     TestMBB->addSuccessor(BlockMBB);
11891     TestMBB->addSuccessor(TailMBB);
11892   }
11893 
11894   {
11895     // Touch the block.
11896     // |P...|P...|P...
11897     BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11898         .addReg(FramePointer)
11899         .addReg(SPReg)
11900         .addReg(ScratchReg);
11901     BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11902     BlockMBB->addSuccessor(TestMBB);
11903   }
11904 
11905   // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11906   // DYNAREAOFFSET pseudo instruction to get the future result.
11907   Register MaxCallFrameSizeReg =
11908       MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11909   BuildMI(TailMBB, DL,
11910           TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11911           MaxCallFrameSizeReg)
11912       .add(MI.getOperand(2))
11913       .add(MI.getOperand(3));
11914   BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11915       .addReg(SPReg)
11916       .addReg(MaxCallFrameSizeReg);
11917 
11918   // Splice instructions after MI to TailMBB.
11919   TailMBB->splice(TailMBB->end(), MBB,
11920                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11921   TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
11922   MBB->addSuccessor(TestMBB);
11923 
11924   // Delete the pseudo instruction.
11925   MI.eraseFromParent();
11926 
11927   ++NumDynamicAllocaProbed;
11928   return TailMBB;
11929 }
11930 
11931 MachineBasicBlock *
11932 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11933                                                MachineBasicBlock *BB) const {
11934   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11935       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11936     if (Subtarget.is64BitELFABI() &&
11937         MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11938         !Subtarget.isUsingPCRelativeCalls()) {
11939       // Call lowering should have added an r2 operand to indicate a dependence
11940       // on the TOC base pointer value. It can't however, because there is no
11941       // way to mark the dependence as implicit there, and so the stackmap code
11942       // will confuse it with a regular operand. Instead, add the dependence
11943       // here.
11944       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11945     }
11946 
11947     return emitPatchPoint(MI, BB);
11948   }
11949 
11950   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11951       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11952     return emitEHSjLjSetJmp(MI, BB);
11953   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11954              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11955     return emitEHSjLjLongJmp(MI, BB);
11956   }
11957 
11958   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11959 
11960   // To "insert" these instructions we actually have to insert their
11961   // control-flow patterns.
11962   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11963   MachineFunction::iterator It = ++BB->getIterator();
11964 
11965   MachineFunction *F = BB->getParent();
11966 
11967   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11968       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11969       MI.getOpcode() == PPC::SELECT_I8) {
11970     SmallVector<MachineOperand, 2> Cond;
11971     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11972         MI.getOpcode() == PPC::SELECT_CC_I8)
11973       Cond.push_back(MI.getOperand(4));
11974     else
11975       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
11976     Cond.push_back(MI.getOperand(1));
11977 
11978     DebugLoc dl = MI.getDebugLoc();
11979     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11980                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11981   } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11982              MI.getOpcode() == PPC::SELECT_CC_F8 ||
11983              MI.getOpcode() == PPC::SELECT_CC_F16 ||
11984              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11985              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11986              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11987              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11988              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11989              MI.getOpcode() == PPC::SELECT_CC_SPE ||
11990              MI.getOpcode() == PPC::SELECT_F4 ||
11991              MI.getOpcode() == PPC::SELECT_F8 ||
11992              MI.getOpcode() == PPC::SELECT_F16 ||
11993              MI.getOpcode() == PPC::SELECT_SPE ||
11994              MI.getOpcode() == PPC::SELECT_SPE4 ||
11995              MI.getOpcode() == PPC::SELECT_VRRC ||
11996              MI.getOpcode() == PPC::SELECT_VSFRC ||
11997              MI.getOpcode() == PPC::SELECT_VSSRC ||
11998              MI.getOpcode() == PPC::SELECT_VSRC) {
11999     // The incoming instruction knows the destination vreg to set, the
12000     // condition code register to branch on, the true/false values to
12001     // select between, and a branch opcode to use.
12002 
12003     //  thisMBB:
12004     //  ...
12005     //   TrueVal = ...
12006     //   cmpTY ccX, r1, r2
12007     //   bCC copy1MBB
12008     //   fallthrough --> copy0MBB
12009     MachineBasicBlock *thisMBB = BB;
12010     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12011     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12012     DebugLoc dl = MI.getDebugLoc();
12013     F->insert(It, copy0MBB);
12014     F->insert(It, sinkMBB);
12015 
12016     // Transfer the remainder of BB and its successor edges to sinkMBB.
12017     sinkMBB->splice(sinkMBB->begin(), BB,
12018                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12019     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12020 
12021     // Next, add the true and fallthrough blocks as its successors.
12022     BB->addSuccessor(copy0MBB);
12023     BB->addSuccessor(sinkMBB);
12024 
12025     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12026         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12027         MI.getOpcode() == PPC::SELECT_F16 ||
12028         MI.getOpcode() == PPC::SELECT_SPE4 ||
12029         MI.getOpcode() == PPC::SELECT_SPE ||
12030         MI.getOpcode() == PPC::SELECT_VRRC ||
12031         MI.getOpcode() == PPC::SELECT_VSFRC ||
12032         MI.getOpcode() == PPC::SELECT_VSSRC ||
12033         MI.getOpcode() == PPC::SELECT_VSRC) {
12034       BuildMI(BB, dl, TII->get(PPC::BC))
12035           .addReg(MI.getOperand(1).getReg())
12036           .addMBB(sinkMBB);
12037     } else {
12038       unsigned SelectPred = MI.getOperand(4).getImm();
12039       BuildMI(BB, dl, TII->get(PPC::BCC))
12040           .addImm(SelectPred)
12041           .addReg(MI.getOperand(1).getReg())
12042           .addMBB(sinkMBB);
12043     }
12044 
12045     //  copy0MBB:
12046     //   %FalseValue = ...
12047     //   # fallthrough to sinkMBB
12048     BB = copy0MBB;
12049 
12050     // Update machine-CFG edges
12051     BB->addSuccessor(sinkMBB);
12052 
12053     //  sinkMBB:
12054     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12055     //  ...
12056     BB = sinkMBB;
12057     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12058         .addReg(MI.getOperand(3).getReg())
12059         .addMBB(copy0MBB)
12060         .addReg(MI.getOperand(2).getReg())
12061         .addMBB(thisMBB);
12062   } else if (MI.getOpcode() == PPC::ReadTB) {
12063     // To read the 64-bit time-base register on a 32-bit target, we read the
12064     // two halves. Should the counter have wrapped while it was being read, we
12065     // need to try again.
12066     // ...
12067     // readLoop:
12068     // mfspr Rx,TBU # load from TBU
12069     // mfspr Ry,TB  # load from TB
12070     // mfspr Rz,TBU # load from TBU
12071     // cmpw crX,Rx,Rz # check if 'old'='new'
12072     // bne readLoop   # branch if they're not equal
12073     // ...
12074 
12075     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12076     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12077     DebugLoc dl = MI.getDebugLoc();
12078     F->insert(It, readMBB);
12079     F->insert(It, sinkMBB);
12080 
12081     // Transfer the remainder of BB and its successor edges to sinkMBB.
12082     sinkMBB->splice(sinkMBB->begin(), BB,
12083                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12084     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12085 
12086     BB->addSuccessor(readMBB);
12087     BB = readMBB;
12088 
12089     MachineRegisterInfo &RegInfo = F->getRegInfo();
12090     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12091     Register LoReg = MI.getOperand(0).getReg();
12092     Register HiReg = MI.getOperand(1).getReg();
12093 
12094     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12095     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12096     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12097 
12098     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12099 
12100     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12101         .addReg(HiReg)
12102         .addReg(ReadAgainReg);
12103     BuildMI(BB, dl, TII->get(PPC::BCC))
12104         .addImm(PPC::PRED_NE)
12105         .addReg(CmpReg)
12106         .addMBB(readMBB);
12107 
12108     BB->addSuccessor(readMBB);
12109     BB->addSuccessor(sinkMBB);
12110   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12111     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12112   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12113     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12114   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12115     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12116   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12117     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12118 
12119   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12120     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12121   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12122     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12123   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12124     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12125   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12126     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12127 
12128   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12129     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12130   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12131     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12132   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12133     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12134   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12135     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12136 
12137   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12138     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12139   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12140     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12141   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12142     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12143   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12144     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12145 
12146   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12147     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12148   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12149     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12150   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12151     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12152   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12153     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12154 
12155   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12156     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12157   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12158     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12159   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12160     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12161   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12162     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12163 
12164   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12165     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12166   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12167     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12168   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12169     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12170   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12171     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12172 
12173   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12174     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12175   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12176     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12177   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12178     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12179   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12180     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12181 
12182   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12183     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12184   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12185     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12186   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12187     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12188   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12189     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12190 
12191   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12192     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12193   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12194     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12195   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12196     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12197   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12198     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12199 
12200   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12201     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12202   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12203     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12204   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12205     BB = EmitAtomicBinary(MI, BB, 4, 0);
12206   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12207     BB = EmitAtomicBinary(MI, BB, 8, 0);
12208   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12209            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12210            (Subtarget.hasPartwordAtomics() &&
12211             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12212            (Subtarget.hasPartwordAtomics() &&
12213             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12214     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12215 
12216     auto LoadMnemonic = PPC::LDARX;
12217     auto StoreMnemonic = PPC::STDCX;
12218     switch (MI.getOpcode()) {
12219     default:
12220       llvm_unreachable("Compare and swap of unknown size");
12221     case PPC::ATOMIC_CMP_SWAP_I8:
12222       LoadMnemonic = PPC::LBARX;
12223       StoreMnemonic = PPC::STBCX;
12224       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12225       break;
12226     case PPC::ATOMIC_CMP_SWAP_I16:
12227       LoadMnemonic = PPC::LHARX;
12228       StoreMnemonic = PPC::STHCX;
12229       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12230       break;
12231     case PPC::ATOMIC_CMP_SWAP_I32:
12232       LoadMnemonic = PPC::LWARX;
12233       StoreMnemonic = PPC::STWCX;
12234       break;
12235     case PPC::ATOMIC_CMP_SWAP_I64:
12236       LoadMnemonic = PPC::LDARX;
12237       StoreMnemonic = PPC::STDCX;
12238       break;
12239     }
12240     Register dest = MI.getOperand(0).getReg();
12241     Register ptrA = MI.getOperand(1).getReg();
12242     Register ptrB = MI.getOperand(2).getReg();
12243     Register oldval = MI.getOperand(3).getReg();
12244     Register newval = MI.getOperand(4).getReg();
12245     DebugLoc dl = MI.getDebugLoc();
12246 
12247     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12248     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12249     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12250     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12251     F->insert(It, loop1MBB);
12252     F->insert(It, loop2MBB);
12253     F->insert(It, midMBB);
12254     F->insert(It, exitMBB);
12255     exitMBB->splice(exitMBB->begin(), BB,
12256                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12257     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12258 
12259     //  thisMBB:
12260     //   ...
12261     //   fallthrough --> loopMBB
12262     BB->addSuccessor(loop1MBB);
12263 
12264     // loop1MBB:
12265     //   l[bhwd]arx dest, ptr
12266     //   cmp[wd] dest, oldval
12267     //   bne- midMBB
12268     // loop2MBB:
12269     //   st[bhwd]cx. newval, ptr
12270     //   bne- loopMBB
12271     //   b exitBB
12272     // midMBB:
12273     //   st[bhwd]cx. dest, ptr
12274     // exitBB:
12275     BB = loop1MBB;
12276     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12277     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12278         .addReg(oldval)
12279         .addReg(dest);
12280     BuildMI(BB, dl, TII->get(PPC::BCC))
12281         .addImm(PPC::PRED_NE)
12282         .addReg(PPC::CR0)
12283         .addMBB(midMBB);
12284     BB->addSuccessor(loop2MBB);
12285     BB->addSuccessor(midMBB);
12286 
12287     BB = loop2MBB;
12288     BuildMI(BB, dl, TII->get(StoreMnemonic))
12289         .addReg(newval)
12290         .addReg(ptrA)
12291         .addReg(ptrB);
12292     BuildMI(BB, dl, TII->get(PPC::BCC))
12293         .addImm(PPC::PRED_NE)
12294         .addReg(PPC::CR0)
12295         .addMBB(loop1MBB);
12296     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12297     BB->addSuccessor(loop1MBB);
12298     BB->addSuccessor(exitMBB);
12299 
12300     BB = midMBB;
12301     BuildMI(BB, dl, TII->get(StoreMnemonic))
12302         .addReg(dest)
12303         .addReg(ptrA)
12304         .addReg(ptrB);
12305     BB->addSuccessor(exitMBB);
12306 
12307     //  exitMBB:
12308     //   ...
12309     BB = exitMBB;
12310   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12311              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12312     // We must use 64-bit registers for addresses when targeting 64-bit,
12313     // since we're actually doing arithmetic on them.  Other registers
12314     // can be 32-bit.
12315     bool is64bit = Subtarget.isPPC64();
12316     bool isLittleEndian = Subtarget.isLittleEndian();
12317     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12318 
12319     Register dest = MI.getOperand(0).getReg();
12320     Register ptrA = MI.getOperand(1).getReg();
12321     Register ptrB = MI.getOperand(2).getReg();
12322     Register oldval = MI.getOperand(3).getReg();
12323     Register newval = MI.getOperand(4).getReg();
12324     DebugLoc dl = MI.getDebugLoc();
12325 
12326     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12327     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12328     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12329     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12330     F->insert(It, loop1MBB);
12331     F->insert(It, loop2MBB);
12332     F->insert(It, midMBB);
12333     F->insert(It, exitMBB);
12334     exitMBB->splice(exitMBB->begin(), BB,
12335                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12336     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12337 
12338     MachineRegisterInfo &RegInfo = F->getRegInfo();
12339     const TargetRegisterClass *RC =
12340         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12341     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12342 
12343     Register PtrReg = RegInfo.createVirtualRegister(RC);
12344     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12345     Register ShiftReg =
12346         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12347     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12348     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12349     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12350     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12351     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12352     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12353     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12354     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12355     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12356     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12357     Register Ptr1Reg;
12358     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12359     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12360     //  thisMBB:
12361     //   ...
12362     //   fallthrough --> loopMBB
12363     BB->addSuccessor(loop1MBB);
12364 
12365     // The 4-byte load must be aligned, while a char or short may be
12366     // anywhere in the word.  Hence all this nasty bookkeeping code.
12367     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
12368     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12369     //   xori shift, shift1, 24 [16]
12370     //   rlwinm ptr, ptr1, 0, 0, 29
12371     //   slw newval2, newval, shift
12372     //   slw oldval2, oldval,shift
12373     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12374     //   slw mask, mask2, shift
12375     //   and newval3, newval2, mask
12376     //   and oldval3, oldval2, mask
12377     // loop1MBB:
12378     //   lwarx tmpDest, ptr
12379     //   and tmp, tmpDest, mask
12380     //   cmpw tmp, oldval3
12381     //   bne- midMBB
12382     // loop2MBB:
12383     //   andc tmp2, tmpDest, mask
12384     //   or tmp4, tmp2, newval3
12385     //   stwcx. tmp4, ptr
12386     //   bne- loop1MBB
12387     //   b exitBB
12388     // midMBB:
12389     //   stwcx. tmpDest, ptr
12390     // exitBB:
12391     //   srw dest, tmpDest, shift
12392     if (ptrA != ZeroReg) {
12393       Ptr1Reg = RegInfo.createVirtualRegister(RC);
12394       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12395           .addReg(ptrA)
12396           .addReg(ptrB);
12397     } else {
12398       Ptr1Reg = ptrB;
12399     }
12400 
12401     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12402     // mode.
12403     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12404         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12405         .addImm(3)
12406         .addImm(27)
12407         .addImm(is8bit ? 28 : 27);
12408     if (!isLittleEndian)
12409       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12410           .addReg(Shift1Reg)
12411           .addImm(is8bit ? 24 : 16);
12412     if (is64bit)
12413       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12414           .addReg(Ptr1Reg)
12415           .addImm(0)
12416           .addImm(61);
12417     else
12418       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12419           .addReg(Ptr1Reg)
12420           .addImm(0)
12421           .addImm(0)
12422           .addImm(29);
12423     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12424         .addReg(newval)
12425         .addReg(ShiftReg);
12426     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12427         .addReg(oldval)
12428         .addReg(ShiftReg);
12429     if (is8bit)
12430       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12431     else {
12432       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12433       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12434           .addReg(Mask3Reg)
12435           .addImm(65535);
12436     }
12437     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12438         .addReg(Mask2Reg)
12439         .addReg(ShiftReg);
12440     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12441         .addReg(NewVal2Reg)
12442         .addReg(MaskReg);
12443     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12444         .addReg(OldVal2Reg)
12445         .addReg(MaskReg);
12446 
12447     BB = loop1MBB;
12448     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12449         .addReg(ZeroReg)
12450         .addReg(PtrReg);
12451     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12452         .addReg(TmpDestReg)
12453         .addReg(MaskReg);
12454     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12455         .addReg(TmpReg)
12456         .addReg(OldVal3Reg);
12457     BuildMI(BB, dl, TII->get(PPC::BCC))
12458         .addImm(PPC::PRED_NE)
12459         .addReg(PPC::CR0)
12460         .addMBB(midMBB);
12461     BB->addSuccessor(loop2MBB);
12462     BB->addSuccessor(midMBB);
12463 
12464     BB = loop2MBB;
12465     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12466         .addReg(TmpDestReg)
12467         .addReg(MaskReg);
12468     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12469         .addReg(Tmp2Reg)
12470         .addReg(NewVal3Reg);
12471     BuildMI(BB, dl, TII->get(PPC::STWCX))
12472         .addReg(Tmp4Reg)
12473         .addReg(ZeroReg)
12474         .addReg(PtrReg);
12475     BuildMI(BB, dl, TII->get(PPC::BCC))
12476         .addImm(PPC::PRED_NE)
12477         .addReg(PPC::CR0)
12478         .addMBB(loop1MBB);
12479     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12480     BB->addSuccessor(loop1MBB);
12481     BB->addSuccessor(exitMBB);
12482 
12483     BB = midMBB;
12484     BuildMI(BB, dl, TII->get(PPC::STWCX))
12485         .addReg(TmpDestReg)
12486         .addReg(ZeroReg)
12487         .addReg(PtrReg);
12488     BB->addSuccessor(exitMBB);
12489 
12490     //  exitMBB:
12491     //   ...
12492     BB = exitMBB;
12493     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12494         .addReg(TmpReg)
12495         .addReg(ShiftReg);
12496   } else if (MI.getOpcode() == PPC::FADDrtz) {
12497     // This pseudo performs an FADD with rounding mode temporarily forced
12498     // to round-to-zero.  We emit this via custom inserter since the FPSCR
12499     // is not modeled at the SelectionDAG level.
12500     Register Dest = MI.getOperand(0).getReg();
12501     Register Src1 = MI.getOperand(1).getReg();
12502     Register Src2 = MI.getOperand(2).getReg();
12503     DebugLoc dl = MI.getDebugLoc();
12504 
12505     MachineRegisterInfo &RegInfo = F->getRegInfo();
12506     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12507 
12508     // Save FPSCR value.
12509     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12510 
12511     // Set rounding mode to round-to-zero.
12512     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12513         .addImm(31)
12514         .addReg(PPC::RM, RegState::ImplicitDefine);
12515 
12516     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12517         .addImm(30)
12518         .addReg(PPC::RM, RegState::ImplicitDefine);
12519 
12520     // Perform addition.
12521     auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12522                    .addReg(Src1)
12523                    .addReg(Src2);
12524     if (MI.getFlag(MachineInstr::NoFPExcept))
12525       MIB.setMIFlag(MachineInstr::NoFPExcept);
12526 
12527     // Restore FPSCR value.
12528     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12529   } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12530              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12531              MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12532              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12533     unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12534                        MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12535                           ? PPC::ANDI8_rec
12536                           : PPC::ANDI_rec;
12537     bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12538                  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12539 
12540     MachineRegisterInfo &RegInfo = F->getRegInfo();
12541     Register Dest = RegInfo.createVirtualRegister(
12542         Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12543 
12544     DebugLoc Dl = MI.getDebugLoc();
12545     BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12546         .addReg(MI.getOperand(1).getReg())
12547         .addImm(1);
12548     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12549             MI.getOperand(0).getReg())
12550         .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12551   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12552     DebugLoc Dl = MI.getDebugLoc();
12553     MachineRegisterInfo &RegInfo = F->getRegInfo();
12554     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12555     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12556     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12557             MI.getOperand(0).getReg())
12558         .addReg(CRReg);
12559   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12560     DebugLoc Dl = MI.getDebugLoc();
12561     unsigned Imm = MI.getOperand(1).getImm();
12562     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12563     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12564             MI.getOperand(0).getReg())
12565         .addReg(PPC::CR0EQ);
12566   } else if (MI.getOpcode() == PPC::SETRNDi) {
12567     DebugLoc dl = MI.getDebugLoc();
12568     Register OldFPSCRReg = MI.getOperand(0).getReg();
12569 
12570     // Save FPSCR value.
12571     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12572 
12573     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12574     // the following settings:
12575     //   00 Round to nearest
12576     //   01 Round to 0
12577     //   10 Round to +inf
12578     //   11 Round to -inf
12579 
12580     // When the operand is immediate, using the two least significant bits of
12581     // the immediate to set the bits 62:63 of FPSCR.
12582     unsigned Mode = MI.getOperand(1).getImm();
12583     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12584         .addImm(31)
12585         .addReg(PPC::RM, RegState::ImplicitDefine);
12586 
12587     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12588         .addImm(30)
12589         .addReg(PPC::RM, RegState::ImplicitDefine);
12590   } else if (MI.getOpcode() == PPC::SETRND) {
12591     DebugLoc dl = MI.getDebugLoc();
12592 
12593     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12594     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12595     // If the target doesn't have DirectMove, we should use stack to do the
12596     // conversion, because the target doesn't have the instructions like mtvsrd
12597     // or mfvsrd to do this conversion directly.
12598     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12599       if (Subtarget.hasDirectMove()) {
12600         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12601           .addReg(SrcReg);
12602       } else {
12603         // Use stack to do the register copy.
12604         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12605         MachineRegisterInfo &RegInfo = F->getRegInfo();
12606         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12607         if (RC == &PPC::F8RCRegClass) {
12608           // Copy register from F8RCRegClass to G8RCRegclass.
12609           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12610                  "Unsupported RegClass.");
12611 
12612           StoreOp = PPC::STFD;
12613           LoadOp = PPC::LD;
12614         } else {
12615           // Copy register from G8RCRegClass to F8RCRegclass.
12616           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12617                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12618                  "Unsupported RegClass.");
12619         }
12620 
12621         MachineFrameInfo &MFI = F->getFrameInfo();
12622         int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12623 
12624         MachineMemOperand *MMOStore = F->getMachineMemOperand(
12625             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12626             MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12627             MFI.getObjectAlign(FrameIdx));
12628 
12629         // Store the SrcReg into the stack.
12630         BuildMI(*BB, MI, dl, TII->get(StoreOp))
12631           .addReg(SrcReg)
12632           .addImm(0)
12633           .addFrameIndex(FrameIdx)
12634           .addMemOperand(MMOStore);
12635 
12636         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12637             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12638             MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12639             MFI.getObjectAlign(FrameIdx));
12640 
12641         // Load from the stack where SrcReg is stored, and save to DestReg,
12642         // so we have done the RegClass conversion from RegClass::SrcReg to
12643         // RegClass::DestReg.
12644         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12645           .addImm(0)
12646           .addFrameIndex(FrameIdx)
12647           .addMemOperand(MMOLoad);
12648       }
12649     };
12650 
12651     Register OldFPSCRReg = MI.getOperand(0).getReg();
12652 
12653     // Save FPSCR value.
12654     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12655 
12656     // When the operand is gprc register, use two least significant bits of the
12657     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12658     //
12659     // copy OldFPSCRTmpReg, OldFPSCRReg
12660     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12661     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12662     // copy NewFPSCRReg, NewFPSCRTmpReg
12663     // mtfsf 255, NewFPSCRReg
12664     MachineOperand SrcOp = MI.getOperand(1);
12665     MachineRegisterInfo &RegInfo = F->getRegInfo();
12666     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12667 
12668     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12669 
12670     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12671     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12672 
12673     // The first operand of INSERT_SUBREG should be a register which has
12674     // subregisters, we only care about its RegClass, so we should use an
12675     // IMPLICIT_DEF register.
12676     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12677     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12678       .addReg(ImDefReg)
12679       .add(SrcOp)
12680       .addImm(1);
12681 
12682     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12683     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12684       .addReg(OldFPSCRTmpReg)
12685       .addReg(ExtSrcReg)
12686       .addImm(0)
12687       .addImm(62);
12688 
12689     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12690     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12691 
12692     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12693     // bits of FPSCR.
12694     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12695       .addImm(255)
12696       .addReg(NewFPSCRReg)
12697       .addImm(0)
12698       .addImm(0);
12699   } else if (MI.getOpcode() == PPC::SETFLM) {
12700     DebugLoc Dl = MI.getDebugLoc();
12701 
12702     // Result of setflm is previous FPSCR content, so we need to save it first.
12703     Register OldFPSCRReg = MI.getOperand(0).getReg();
12704     BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12705 
12706     // Put bits in 32:63 to FPSCR.
12707     Register NewFPSCRReg = MI.getOperand(1).getReg();
12708     BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12709         .addImm(255)
12710         .addReg(NewFPSCRReg)
12711         .addImm(0)
12712         .addImm(0);
12713   } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12714              MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12715     return emitProbedAlloca(MI, BB);
12716   } else {
12717     llvm_unreachable("Unexpected instr type to insert");
12718   }
12719 
12720   MI.eraseFromParent(); // The pseudo instruction is gone now.
12721   return BB;
12722 }
12723 
12724 //===----------------------------------------------------------------------===//
12725 // Target Optimization Hooks
12726 //===----------------------------------------------------------------------===//
12727 
12728 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12729   // For the estimates, convergence is quadratic, so we essentially double the
12730   // number of digits correct after every iteration. For both FRE and FRSQRTE,
12731   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12732   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12733   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12734   if (VT.getScalarType() == MVT::f64)
12735     RefinementSteps++;
12736   return RefinementSteps;
12737 }
12738 
12739 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
12740                                             const DenormalMode &Mode) const {
12741   // TODO - add support for v2f64/v4f32
12742   EVT VT = Op.getValueType();
12743   if (VT != MVT::f64)
12744     return SDValue();
12745 
12746   SDLoc DL(Op);
12747   // The output register of FTSQRT is CR field.
12748   SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
12749   // ftsqrt BF,FRB
12750   // Let e_b be the unbiased exponent of the double-precision
12751   // floating-point operand in register FRB.
12752   // fe_flag is set to 1 if either of the following conditions occurs.
12753   //   - The double-precision floating-point operand in register FRB is a zero,
12754   //     a NaN, or an infinity, or a negative value.
12755   //   - e_b is less than or equal to -970.
12756   // Otherwise fe_flag is set to 0.
12757   // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
12758   // not eligible for iteration. (zero/negative/infinity/nan or unbiased
12759   // exponent is less than -970)
12760   SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
12761   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
12762                                     FTSQRT, SRIdxVal),
12763                  0);
12764 }
12765 
12766 SDValue
12767 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
12768                                                SelectionDAG &DAG) const {
12769   // TODO - add support for v2f64/v4f32
12770   EVT VT = Op.getValueType();
12771   if (VT != MVT::f64)
12772     return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
12773 
12774   return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
12775 }
12776 
12777 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12778                                            int Enabled, int &RefinementSteps,
12779                                            bool &UseOneConstNR,
12780                                            bool Reciprocal) const {
12781   EVT VT = Operand.getValueType();
12782   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12783       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12784       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12785       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12786     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12787       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12788 
12789     // The Newton-Raphson computation with a single constant does not provide
12790     // enough accuracy on some CPUs.
12791     UseOneConstNR = !Subtarget.needsTwoConstNR();
12792     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12793   }
12794   return SDValue();
12795 }
12796 
12797 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12798                                             int Enabled,
12799                                             int &RefinementSteps) const {
12800   EVT VT = Operand.getValueType();
12801   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12802       (VT == MVT::f64 && Subtarget.hasFRE()) ||
12803       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12804       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12805     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12806       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12807     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12808   }
12809   return SDValue();
12810 }
12811 
12812 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12813   // Note: This functionality is used only when unsafe-fp-math is enabled, and
12814   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12815   // enabled for division), this functionality is redundant with the default
12816   // combiner logic (once the division -> reciprocal/multiply transformation
12817   // has taken place). As a result, this matters more for older cores than for
12818   // newer ones.
12819 
12820   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12821   // reciprocal if there are two or more FDIVs (for embedded cores with only
12822   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12823   switch (Subtarget.getCPUDirective()) {
12824   default:
12825     return 3;
12826   case PPC::DIR_440:
12827   case PPC::DIR_A2:
12828   case PPC::DIR_E500:
12829   case PPC::DIR_E500mc:
12830   case PPC::DIR_E5500:
12831     return 2;
12832   }
12833 }
12834 
12835 // isConsecutiveLSLoc needs to work even if all adds have not yet been
12836 // collapsed, and so we need to look through chains of them.
12837 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
12838                                      int64_t& Offset, SelectionDAG &DAG) {
12839   if (DAG.isBaseWithConstantOffset(Loc)) {
12840     Base = Loc.getOperand(0);
12841     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12842 
12843     // The base might itself be a base plus an offset, and if so, accumulate
12844     // that as well.
12845     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12846   }
12847 }
12848 
12849 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12850                             unsigned Bytes, int Dist,
12851                             SelectionDAG &DAG) {
12852   if (VT.getSizeInBits() / 8 != Bytes)
12853     return false;
12854 
12855   SDValue BaseLoc = Base->getBasePtr();
12856   if (Loc.getOpcode() == ISD::FrameIndex) {
12857     if (BaseLoc.getOpcode() != ISD::FrameIndex)
12858       return false;
12859     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12860     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
12861     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12862     int FS  = MFI.getObjectSize(FI);
12863     int BFS = MFI.getObjectSize(BFI);
12864     if (FS != BFS || FS != (int)Bytes) return false;
12865     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12866   }
12867 
12868   SDValue Base1 = Loc, Base2 = BaseLoc;
12869   int64_t Offset1 = 0, Offset2 = 0;
12870   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12871   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12872   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12873     return true;
12874 
12875   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12876   const GlobalValue *GV1 = nullptr;
12877   const GlobalValue *GV2 = nullptr;
12878   Offset1 = 0;
12879   Offset2 = 0;
12880   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12881   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12882   if (isGA1 && isGA2 && GV1 == GV2)
12883     return Offset1 == (Offset2 + Dist*Bytes);
12884   return false;
12885 }
12886 
12887 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12888 // not enforce equality of the chain operands.
12889 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12890                             unsigned Bytes, int Dist,
12891                             SelectionDAG &DAG) {
12892   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12893     EVT VT = LS->getMemoryVT();
12894     SDValue Loc = LS->getBasePtr();
12895     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12896   }
12897 
12898   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12899     EVT VT;
12900     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12901     default: return false;
12902     case Intrinsic::ppc_altivec_lvx:
12903     case Intrinsic::ppc_altivec_lvxl:
12904     case Intrinsic::ppc_vsx_lxvw4x:
12905     case Intrinsic::ppc_vsx_lxvw4x_be:
12906       VT = MVT::v4i32;
12907       break;
12908     case Intrinsic::ppc_vsx_lxvd2x:
12909     case Intrinsic::ppc_vsx_lxvd2x_be:
12910       VT = MVT::v2f64;
12911       break;
12912     case Intrinsic::ppc_altivec_lvebx:
12913       VT = MVT::i8;
12914       break;
12915     case Intrinsic::ppc_altivec_lvehx:
12916       VT = MVT::i16;
12917       break;
12918     case Intrinsic::ppc_altivec_lvewx:
12919       VT = MVT::i32;
12920       break;
12921     }
12922 
12923     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12924   }
12925 
12926   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12927     EVT VT;
12928     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12929     default: return false;
12930     case Intrinsic::ppc_altivec_stvx:
12931     case Intrinsic::ppc_altivec_stvxl:
12932     case Intrinsic::ppc_vsx_stxvw4x:
12933       VT = MVT::v4i32;
12934       break;
12935     case Intrinsic::ppc_vsx_stxvd2x:
12936       VT = MVT::v2f64;
12937       break;
12938     case Intrinsic::ppc_vsx_stxvw4x_be:
12939       VT = MVT::v4i32;
12940       break;
12941     case Intrinsic::ppc_vsx_stxvd2x_be:
12942       VT = MVT::v2f64;
12943       break;
12944     case Intrinsic::ppc_altivec_stvebx:
12945       VT = MVT::i8;
12946       break;
12947     case Intrinsic::ppc_altivec_stvehx:
12948       VT = MVT::i16;
12949       break;
12950     case Intrinsic::ppc_altivec_stvewx:
12951       VT = MVT::i32;
12952       break;
12953     }
12954 
12955     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12956   }
12957 
12958   return false;
12959 }
12960 
12961 // Return true is there is a nearyby consecutive load to the one provided
12962 // (regardless of alignment). We search up and down the chain, looking though
12963 // token factors and other loads (but nothing else). As a result, a true result
12964 // indicates that it is safe to create a new consecutive load adjacent to the
12965 // load provided.
12966 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
12967   SDValue Chain = LD->getChain();
12968   EVT VT = LD->getMemoryVT();
12969 
12970   SmallSet<SDNode *, 16> LoadRoots;
12971   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12972   SmallSet<SDNode *, 16> Visited;
12973 
12974   // First, search up the chain, branching to follow all token-factor operands.
12975   // If we find a consecutive load, then we're done, otherwise, record all
12976   // nodes just above the top-level loads and token factors.
12977   while (!Queue.empty()) {
12978     SDNode *ChainNext = Queue.pop_back_val();
12979     if (!Visited.insert(ChainNext).second)
12980       continue;
12981 
12982     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12983       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12984         return true;
12985 
12986       if (!Visited.count(ChainLD->getChain().getNode()))
12987         Queue.push_back(ChainLD->getChain().getNode());
12988     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12989       for (const SDUse &O : ChainNext->ops())
12990         if (!Visited.count(O.getNode()))
12991           Queue.push_back(O.getNode());
12992     } else
12993       LoadRoots.insert(ChainNext);
12994   }
12995 
12996   // Second, search down the chain, starting from the top-level nodes recorded
12997   // in the first phase. These top-level nodes are the nodes just above all
12998   // loads and token factors. Starting with their uses, recursively look though
12999   // all loads (just the chain uses) and token factors to find a consecutive
13000   // load.
13001   Visited.clear();
13002   Queue.clear();
13003 
13004   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
13005        IE = LoadRoots.end(); I != IE; ++I) {
13006     Queue.push_back(*I);
13007 
13008     while (!Queue.empty()) {
13009       SDNode *LoadRoot = Queue.pop_back_val();
13010       if (!Visited.insert(LoadRoot).second)
13011         continue;
13012 
13013       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13014         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13015           return true;
13016 
13017       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
13018            UE = LoadRoot->use_end(); UI != UE; ++UI)
13019         if (((isa<MemSDNode>(*UI) &&
13020             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
13021             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
13022           Queue.push_back(*UI);
13023     }
13024   }
13025 
13026   return false;
13027 }
13028 
13029 /// This function is called when we have proved that a SETCC node can be replaced
13030 /// by subtraction (and other supporting instructions) so that the result of
13031 /// comparison is kept in a GPR instead of CR. This function is purely for
13032 /// codegen purposes and has some flags to guide the codegen process.
13033 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13034                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13035   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13036 
13037   // Zero extend the operands to the largest legal integer. Originally, they
13038   // must be of a strictly smaller size.
13039   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13040                          DAG.getConstant(Size, DL, MVT::i32));
13041   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13042                          DAG.getConstant(Size, DL, MVT::i32));
13043 
13044   // Swap if needed. Depends on the condition code.
13045   if (Swap)
13046     std::swap(Op0, Op1);
13047 
13048   // Subtract extended integers.
13049   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13050 
13051   // Move the sign bit to the least significant position and zero out the rest.
13052   // Now the least significant bit carries the result of original comparison.
13053   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13054                              DAG.getConstant(Size - 1, DL, MVT::i32));
13055   auto Final = Shifted;
13056 
13057   // Complement the result if needed. Based on the condition code.
13058   if (Complement)
13059     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13060                         DAG.getConstant(1, DL, MVT::i64));
13061 
13062   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13063 }
13064 
13065 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13066                                                   DAGCombinerInfo &DCI) const {
13067   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13068 
13069   SelectionDAG &DAG = DCI.DAG;
13070   SDLoc DL(N);
13071 
13072   // Size of integers being compared has a critical role in the following
13073   // analysis, so we prefer to do this when all types are legal.
13074   if (!DCI.isAfterLegalizeDAG())
13075     return SDValue();
13076 
13077   // If all users of SETCC extend its value to a legal integer type
13078   // then we replace SETCC with a subtraction
13079   for (SDNode::use_iterator UI = N->use_begin(),
13080        UE = N->use_end(); UI != UE; ++UI) {
13081     if (UI->getOpcode() != ISD::ZERO_EXTEND)
13082       return SDValue();
13083   }
13084 
13085   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13086   auto OpSize = N->getOperand(0).getValueSizeInBits();
13087 
13088   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
13089 
13090   if (OpSize < Size) {
13091     switch (CC) {
13092     default: break;
13093     case ISD::SETULT:
13094       return generateEquivalentSub(N, Size, false, false, DL, DAG);
13095     case ISD::SETULE:
13096       return generateEquivalentSub(N, Size, true, true, DL, DAG);
13097     case ISD::SETUGT:
13098       return generateEquivalentSub(N, Size, false, true, DL, DAG);
13099     case ISD::SETUGE:
13100       return generateEquivalentSub(N, Size, true, false, DL, DAG);
13101     }
13102   }
13103 
13104   return SDValue();
13105 }
13106 
13107 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13108                                                   DAGCombinerInfo &DCI) const {
13109   SelectionDAG &DAG = DCI.DAG;
13110   SDLoc dl(N);
13111 
13112   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13113   // If we're tracking CR bits, we need to be careful that we don't have:
13114   //   trunc(binary-ops(zext(x), zext(y)))
13115   // or
13116   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13117   // such that we're unnecessarily moving things into GPRs when it would be
13118   // better to keep them in CR bits.
13119 
13120   // Note that trunc here can be an actual i1 trunc, or can be the effective
13121   // truncation that comes from a setcc or select_cc.
13122   if (N->getOpcode() == ISD::TRUNCATE &&
13123       N->getValueType(0) != MVT::i1)
13124     return SDValue();
13125 
13126   if (N->getOperand(0).getValueType() != MVT::i32 &&
13127       N->getOperand(0).getValueType() != MVT::i64)
13128     return SDValue();
13129 
13130   if (N->getOpcode() == ISD::SETCC ||
13131       N->getOpcode() == ISD::SELECT_CC) {
13132     // If we're looking at a comparison, then we need to make sure that the
13133     // high bits (all except for the first) don't matter the result.
13134     ISD::CondCode CC =
13135       cast<CondCodeSDNode>(N->getOperand(
13136         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13137     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13138 
13139     if (ISD::isSignedIntSetCC(CC)) {
13140       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13141           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13142         return SDValue();
13143     } else if (ISD::isUnsignedIntSetCC(CC)) {
13144       if (!DAG.MaskedValueIsZero(N->getOperand(0),
13145                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13146           !DAG.MaskedValueIsZero(N->getOperand(1),
13147                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
13148         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13149                                              : SDValue());
13150     } else {
13151       // This is neither a signed nor an unsigned comparison, just make sure
13152       // that the high bits are equal.
13153       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13154       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13155 
13156       // We don't really care about what is known about the first bit (if
13157       // anything), so clear it in all masks prior to comparing them.
13158       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
13159       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
13160 
13161       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
13162         return SDValue();
13163     }
13164   }
13165 
13166   // We now know that the higher-order bits are irrelevant, we just need to
13167   // make sure that all of the intermediate operations are bit operations, and
13168   // all inputs are extensions.
13169   if (N->getOperand(0).getOpcode() != ISD::AND &&
13170       N->getOperand(0).getOpcode() != ISD::OR  &&
13171       N->getOperand(0).getOpcode() != ISD::XOR &&
13172       N->getOperand(0).getOpcode() != ISD::SELECT &&
13173       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13174       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13175       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13176       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13177       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13178     return SDValue();
13179 
13180   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13181       N->getOperand(1).getOpcode() != ISD::AND &&
13182       N->getOperand(1).getOpcode() != ISD::OR  &&
13183       N->getOperand(1).getOpcode() != ISD::XOR &&
13184       N->getOperand(1).getOpcode() != ISD::SELECT &&
13185       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13186       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13187       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13188       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13189       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13190     return SDValue();
13191 
13192   SmallVector<SDValue, 4> Inputs;
13193   SmallVector<SDValue, 8> BinOps, PromOps;
13194   SmallPtrSet<SDNode *, 16> Visited;
13195 
13196   for (unsigned i = 0; i < 2; ++i) {
13197     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13198           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13199           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13200           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13201         isa<ConstantSDNode>(N->getOperand(i)))
13202       Inputs.push_back(N->getOperand(i));
13203     else
13204       BinOps.push_back(N->getOperand(i));
13205 
13206     if (N->getOpcode() == ISD::TRUNCATE)
13207       break;
13208   }
13209 
13210   // Visit all inputs, collect all binary operations (and, or, xor and
13211   // select) that are all fed by extensions.
13212   while (!BinOps.empty()) {
13213     SDValue BinOp = BinOps.back();
13214     BinOps.pop_back();
13215 
13216     if (!Visited.insert(BinOp.getNode()).second)
13217       continue;
13218 
13219     PromOps.push_back(BinOp);
13220 
13221     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13222       // The condition of the select is not promoted.
13223       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13224         continue;
13225       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13226         continue;
13227 
13228       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13229             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13230             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13231            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13232           isa<ConstantSDNode>(BinOp.getOperand(i))) {
13233         Inputs.push_back(BinOp.getOperand(i));
13234       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13235                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
13236                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13237                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13238                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13239                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13240                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13241                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13242                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13243         BinOps.push_back(BinOp.getOperand(i));
13244       } else {
13245         // We have an input that is not an extension or another binary
13246         // operation; we'll abort this transformation.
13247         return SDValue();
13248       }
13249     }
13250   }
13251 
13252   // Make sure that this is a self-contained cluster of operations (which
13253   // is not quite the same thing as saying that everything has only one
13254   // use).
13255   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13256     if (isa<ConstantSDNode>(Inputs[i]))
13257       continue;
13258 
13259     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13260                               UE = Inputs[i].getNode()->use_end();
13261          UI != UE; ++UI) {
13262       SDNode *User = *UI;
13263       if (User != N && !Visited.count(User))
13264         return SDValue();
13265 
13266       // Make sure that we're not going to promote the non-output-value
13267       // operand(s) or SELECT or SELECT_CC.
13268       // FIXME: Although we could sometimes handle this, and it does occur in
13269       // practice that one of the condition inputs to the select is also one of
13270       // the outputs, we currently can't deal with this.
13271       if (User->getOpcode() == ISD::SELECT) {
13272         if (User->getOperand(0) == Inputs[i])
13273           return SDValue();
13274       } else if (User->getOpcode() == ISD::SELECT_CC) {
13275         if (User->getOperand(0) == Inputs[i] ||
13276             User->getOperand(1) == Inputs[i])
13277           return SDValue();
13278       }
13279     }
13280   }
13281 
13282   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13283     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13284                               UE = PromOps[i].getNode()->use_end();
13285          UI != UE; ++UI) {
13286       SDNode *User = *UI;
13287       if (User != N && !Visited.count(User))
13288         return SDValue();
13289 
13290       // Make sure that we're not going to promote the non-output-value
13291       // operand(s) or SELECT or SELECT_CC.
13292       // FIXME: Although we could sometimes handle this, and it does occur in
13293       // practice that one of the condition inputs to the select is also one of
13294       // the outputs, we currently can't deal with this.
13295       if (User->getOpcode() == ISD::SELECT) {
13296         if (User->getOperand(0) == PromOps[i])
13297           return SDValue();
13298       } else if (User->getOpcode() == ISD::SELECT_CC) {
13299         if (User->getOperand(0) == PromOps[i] ||
13300             User->getOperand(1) == PromOps[i])
13301           return SDValue();
13302       }
13303     }
13304   }
13305 
13306   // Replace all inputs with the extension operand.
13307   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13308     // Constants may have users outside the cluster of to-be-promoted nodes,
13309     // and so we need to replace those as we do the promotions.
13310     if (isa<ConstantSDNode>(Inputs[i]))
13311       continue;
13312     else
13313       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13314   }
13315 
13316   std::list<HandleSDNode> PromOpHandles;
13317   for (auto &PromOp : PromOps)
13318     PromOpHandles.emplace_back(PromOp);
13319 
13320   // Replace all operations (these are all the same, but have a different
13321   // (i1) return type). DAG.getNode will validate that the types of
13322   // a binary operator match, so go through the list in reverse so that
13323   // we've likely promoted both operands first. Any intermediate truncations or
13324   // extensions disappear.
13325   while (!PromOpHandles.empty()) {
13326     SDValue PromOp = PromOpHandles.back().getValue();
13327     PromOpHandles.pop_back();
13328 
13329     if (PromOp.getOpcode() == ISD::TRUNCATE ||
13330         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13331         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13332         PromOp.getOpcode() == ISD::ANY_EXTEND) {
13333       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13334           PromOp.getOperand(0).getValueType() != MVT::i1) {
13335         // The operand is not yet ready (see comment below).
13336         PromOpHandles.emplace_front(PromOp);
13337         continue;
13338       }
13339 
13340       SDValue RepValue = PromOp.getOperand(0);
13341       if (isa<ConstantSDNode>(RepValue))
13342         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13343 
13344       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13345       continue;
13346     }
13347 
13348     unsigned C;
13349     switch (PromOp.getOpcode()) {
13350     default:             C = 0; break;
13351     case ISD::SELECT:    C = 1; break;
13352     case ISD::SELECT_CC: C = 2; break;
13353     }
13354 
13355     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13356          PromOp.getOperand(C).getValueType() != MVT::i1) ||
13357         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13358          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13359       // The to-be-promoted operands of this node have not yet been
13360       // promoted (this should be rare because we're going through the
13361       // list backward, but if one of the operands has several users in
13362       // this cluster of to-be-promoted nodes, it is possible).
13363       PromOpHandles.emplace_front(PromOp);
13364       continue;
13365     }
13366 
13367     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13368                                 PromOp.getNode()->op_end());
13369 
13370     // If there are any constant inputs, make sure they're replaced now.
13371     for (unsigned i = 0; i < 2; ++i)
13372       if (isa<ConstantSDNode>(Ops[C+i]))
13373         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13374 
13375     DAG.ReplaceAllUsesOfValueWith(PromOp,
13376       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13377   }
13378 
13379   // Now we're left with the initial truncation itself.
13380   if (N->getOpcode() == ISD::TRUNCATE)
13381     return N->getOperand(0);
13382 
13383   // Otherwise, this is a comparison. The operands to be compared have just
13384   // changed type (to i1), but everything else is the same.
13385   return SDValue(N, 0);
13386 }
13387 
13388 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13389                                                   DAGCombinerInfo &DCI) const {
13390   SelectionDAG &DAG = DCI.DAG;
13391   SDLoc dl(N);
13392 
13393   // If we're tracking CR bits, we need to be careful that we don't have:
13394   //   zext(binary-ops(trunc(x), trunc(y)))
13395   // or
13396   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13397   // such that we're unnecessarily moving things into CR bits that can more
13398   // efficiently stay in GPRs. Note that if we're not certain that the high
13399   // bits are set as required by the final extension, we still may need to do
13400   // some masking to get the proper behavior.
13401 
13402   // This same functionality is important on PPC64 when dealing with
13403   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13404   // the return values of functions. Because it is so similar, it is handled
13405   // here as well.
13406 
13407   if (N->getValueType(0) != MVT::i32 &&
13408       N->getValueType(0) != MVT::i64)
13409     return SDValue();
13410 
13411   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13412         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13413     return SDValue();
13414 
13415   if (N->getOperand(0).getOpcode() != ISD::AND &&
13416       N->getOperand(0).getOpcode() != ISD::OR  &&
13417       N->getOperand(0).getOpcode() != ISD::XOR &&
13418       N->getOperand(0).getOpcode() != ISD::SELECT &&
13419       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13420     return SDValue();
13421 
13422   SmallVector<SDValue, 4> Inputs;
13423   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13424   SmallPtrSet<SDNode *, 16> Visited;
13425 
13426   // Visit all inputs, collect all binary operations (and, or, xor and
13427   // select) that are all fed by truncations.
13428   while (!BinOps.empty()) {
13429     SDValue BinOp = BinOps.back();
13430     BinOps.pop_back();
13431 
13432     if (!Visited.insert(BinOp.getNode()).second)
13433       continue;
13434 
13435     PromOps.push_back(BinOp);
13436 
13437     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13438       // The condition of the select is not promoted.
13439       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13440         continue;
13441       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13442         continue;
13443 
13444       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13445           isa<ConstantSDNode>(BinOp.getOperand(i))) {
13446         Inputs.push_back(BinOp.getOperand(i));
13447       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13448                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
13449                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13450                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13451                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13452         BinOps.push_back(BinOp.getOperand(i));
13453       } else {
13454         // We have an input that is not a truncation or another binary
13455         // operation; we'll abort this transformation.
13456         return SDValue();
13457       }
13458     }
13459   }
13460 
13461   // The operands of a select that must be truncated when the select is
13462   // promoted because the operand is actually part of the to-be-promoted set.
13463   DenseMap<SDNode *, EVT> SelectTruncOp[2];
13464 
13465   // Make sure that this is a self-contained cluster of operations (which
13466   // is not quite the same thing as saying that everything has only one
13467   // use).
13468   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13469     if (isa<ConstantSDNode>(Inputs[i]))
13470       continue;
13471 
13472     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13473                               UE = Inputs[i].getNode()->use_end();
13474          UI != UE; ++UI) {
13475       SDNode *User = *UI;
13476       if (User != N && !Visited.count(User))
13477         return SDValue();
13478 
13479       // If we're going to promote the non-output-value operand(s) or SELECT or
13480       // SELECT_CC, record them for truncation.
13481       if (User->getOpcode() == ISD::SELECT) {
13482         if (User->getOperand(0) == Inputs[i])
13483           SelectTruncOp[0].insert(std::make_pair(User,
13484                                     User->getOperand(0).getValueType()));
13485       } else if (User->getOpcode() == ISD::SELECT_CC) {
13486         if (User->getOperand(0) == Inputs[i])
13487           SelectTruncOp[0].insert(std::make_pair(User,
13488                                     User->getOperand(0).getValueType()));
13489         if (User->getOperand(1) == Inputs[i])
13490           SelectTruncOp[1].insert(std::make_pair(User,
13491                                     User->getOperand(1).getValueType()));
13492       }
13493     }
13494   }
13495 
13496   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13497     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13498                               UE = PromOps[i].getNode()->use_end();
13499          UI != UE; ++UI) {
13500       SDNode *User = *UI;
13501       if (User != N && !Visited.count(User))
13502         return SDValue();
13503 
13504       // If we're going to promote the non-output-value operand(s) or SELECT or
13505       // SELECT_CC, record them for truncation.
13506       if (User->getOpcode() == ISD::SELECT) {
13507         if (User->getOperand(0) == PromOps[i])
13508           SelectTruncOp[0].insert(std::make_pair(User,
13509                                     User->getOperand(0).getValueType()));
13510       } else if (User->getOpcode() == ISD::SELECT_CC) {
13511         if (User->getOperand(0) == PromOps[i])
13512           SelectTruncOp[0].insert(std::make_pair(User,
13513                                     User->getOperand(0).getValueType()));
13514         if (User->getOperand(1) == PromOps[i])
13515           SelectTruncOp[1].insert(std::make_pair(User,
13516                                     User->getOperand(1).getValueType()));
13517       }
13518     }
13519   }
13520 
13521   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13522   bool ReallyNeedsExt = false;
13523   if (N->getOpcode() != ISD::ANY_EXTEND) {
13524     // If all of the inputs are not already sign/zero extended, then
13525     // we'll still need to do that at the end.
13526     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13527       if (isa<ConstantSDNode>(Inputs[i]))
13528         continue;
13529 
13530       unsigned OpBits =
13531         Inputs[i].getOperand(0).getValueSizeInBits();
13532       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13533 
13534       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13535            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13536                                   APInt::getHighBitsSet(OpBits,
13537                                                         OpBits-PromBits))) ||
13538           (N->getOpcode() == ISD::SIGN_EXTEND &&
13539            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13540              (OpBits-(PromBits-1)))) {
13541         ReallyNeedsExt = true;
13542         break;
13543       }
13544     }
13545   }
13546 
13547   // Replace all inputs, either with the truncation operand, or a
13548   // truncation or extension to the final output type.
13549   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13550     // Constant inputs need to be replaced with the to-be-promoted nodes that
13551     // use them because they might have users outside of the cluster of
13552     // promoted nodes.
13553     if (isa<ConstantSDNode>(Inputs[i]))
13554       continue;
13555 
13556     SDValue InSrc = Inputs[i].getOperand(0);
13557     if (Inputs[i].getValueType() == N->getValueType(0))
13558       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13559     else if (N->getOpcode() == ISD::SIGN_EXTEND)
13560       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13561         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13562     else if (N->getOpcode() == ISD::ZERO_EXTEND)
13563       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13564         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13565     else
13566       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13567         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13568   }
13569 
13570   std::list<HandleSDNode> PromOpHandles;
13571   for (auto &PromOp : PromOps)
13572     PromOpHandles.emplace_back(PromOp);
13573 
13574   // Replace all operations (these are all the same, but have a different
13575   // (promoted) return type). DAG.getNode will validate that the types of
13576   // a binary operator match, so go through the list in reverse so that
13577   // we've likely promoted both operands first.
13578   while (!PromOpHandles.empty()) {
13579     SDValue PromOp = PromOpHandles.back().getValue();
13580     PromOpHandles.pop_back();
13581 
13582     unsigned C;
13583     switch (PromOp.getOpcode()) {
13584     default:             C = 0; break;
13585     case ISD::SELECT:    C = 1; break;
13586     case ISD::SELECT_CC: C = 2; break;
13587     }
13588 
13589     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13590          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13591         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13592          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13593       // The to-be-promoted operands of this node have not yet been
13594       // promoted (this should be rare because we're going through the
13595       // list backward, but if one of the operands has several users in
13596       // this cluster of to-be-promoted nodes, it is possible).
13597       PromOpHandles.emplace_front(PromOp);
13598       continue;
13599     }
13600 
13601     // For SELECT and SELECT_CC nodes, we do a similar check for any
13602     // to-be-promoted comparison inputs.
13603     if (PromOp.getOpcode() == ISD::SELECT ||
13604         PromOp.getOpcode() == ISD::SELECT_CC) {
13605       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13606            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13607           (SelectTruncOp[1].count(PromOp.getNode()) &&
13608            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13609         PromOpHandles.emplace_front(PromOp);
13610         continue;
13611       }
13612     }
13613 
13614     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13615                                 PromOp.getNode()->op_end());
13616 
13617     // If this node has constant inputs, then they'll need to be promoted here.
13618     for (unsigned i = 0; i < 2; ++i) {
13619       if (!isa<ConstantSDNode>(Ops[C+i]))
13620         continue;
13621       if (Ops[C+i].getValueType() == N->getValueType(0))
13622         continue;
13623 
13624       if (N->getOpcode() == ISD::SIGN_EXTEND)
13625         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13626       else if (N->getOpcode() == ISD::ZERO_EXTEND)
13627         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13628       else
13629         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13630     }
13631 
13632     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13633     // truncate them again to the original value type.
13634     if (PromOp.getOpcode() == ISD::SELECT ||
13635         PromOp.getOpcode() == ISD::SELECT_CC) {
13636       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13637       if (SI0 != SelectTruncOp[0].end())
13638         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13639       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13640       if (SI1 != SelectTruncOp[1].end())
13641         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13642     }
13643 
13644     DAG.ReplaceAllUsesOfValueWith(PromOp,
13645       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13646   }
13647 
13648   // Now we're left with the initial extension itself.
13649   if (!ReallyNeedsExt)
13650     return N->getOperand(0);
13651 
13652   // To zero extend, just mask off everything except for the first bit (in the
13653   // i1 case).
13654   if (N->getOpcode() == ISD::ZERO_EXTEND)
13655     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13656                        DAG.getConstant(APInt::getLowBitsSet(
13657                                          N->getValueSizeInBits(0), PromBits),
13658                                        dl, N->getValueType(0)));
13659 
13660   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13661          "Invalid extension type");
13662   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13663   SDValue ShiftCst =
13664       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13665   return DAG.getNode(
13666       ISD::SRA, dl, N->getValueType(0),
13667       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13668       ShiftCst);
13669 }
13670 
13671 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13672                                         DAGCombinerInfo &DCI) const {
13673   assert(N->getOpcode() == ISD::SETCC &&
13674          "Should be called with a SETCC node");
13675 
13676   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13677   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13678     SDValue LHS = N->getOperand(0);
13679     SDValue RHS = N->getOperand(1);
13680 
13681     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13682     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13683         LHS.hasOneUse())
13684       std::swap(LHS, RHS);
13685 
13686     // x == 0-y --> x+y == 0
13687     // x != 0-y --> x+y != 0
13688     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13689         RHS.hasOneUse()) {
13690       SDLoc DL(N);
13691       SelectionDAG &DAG = DCI.DAG;
13692       EVT VT = N->getValueType(0);
13693       EVT OpVT = LHS.getValueType();
13694       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13695       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13696     }
13697   }
13698 
13699   return DAGCombineTruncBoolExt(N, DCI);
13700 }
13701 
13702 // Is this an extending load from an f32 to an f64?
13703 static bool isFPExtLoad(SDValue Op) {
13704   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13705     return LD->getExtensionType() == ISD::EXTLOAD &&
13706       Op.getValueType() == MVT::f64;
13707   return false;
13708 }
13709 
13710 /// Reduces the number of fp-to-int conversion when building a vector.
13711 ///
13712 /// If this vector is built out of floating to integer conversions,
13713 /// transform it to a vector built out of floating point values followed by a
13714 /// single floating to integer conversion of the vector.
13715 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
13716 /// becomes (fptosi (build_vector ($A, $B, ...)))
13717 SDValue PPCTargetLowering::
13718 combineElementTruncationToVectorTruncation(SDNode *N,
13719                                            DAGCombinerInfo &DCI) const {
13720   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13721          "Should be called with a BUILD_VECTOR node");
13722 
13723   SelectionDAG &DAG = DCI.DAG;
13724   SDLoc dl(N);
13725 
13726   SDValue FirstInput = N->getOperand(0);
13727   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13728          "The input operand must be an fp-to-int conversion.");
13729 
13730   // This combine happens after legalization so the fp_to_[su]i nodes are
13731   // already converted to PPCSISD nodes.
13732   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13733   if (FirstConversion == PPCISD::FCTIDZ ||
13734       FirstConversion == PPCISD::FCTIDUZ ||
13735       FirstConversion == PPCISD::FCTIWZ ||
13736       FirstConversion == PPCISD::FCTIWUZ) {
13737     bool IsSplat = true;
13738     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13739       FirstConversion == PPCISD::FCTIWUZ;
13740     EVT SrcVT = FirstInput.getOperand(0).getValueType();
13741     SmallVector<SDValue, 4> Ops;
13742     EVT TargetVT = N->getValueType(0);
13743     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13744       SDValue NextOp = N->getOperand(i);
13745       if (NextOp.getOpcode() != PPCISD::MFVSR)
13746         return SDValue();
13747       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13748       if (NextConversion != FirstConversion)
13749         return SDValue();
13750       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13751       // This is not valid if the input was originally double precision. It is
13752       // also not profitable to do unless this is an extending load in which
13753       // case doing this combine will allow us to combine consecutive loads.
13754       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13755         return SDValue();
13756       if (N->getOperand(i) != FirstInput)
13757         IsSplat = false;
13758     }
13759 
13760     // If this is a splat, we leave it as-is since there will be only a single
13761     // fp-to-int conversion followed by a splat of the integer. This is better
13762     // for 32-bit and smaller ints and neutral for 64-bit ints.
13763     if (IsSplat)
13764       return SDValue();
13765 
13766     // Now that we know we have the right type of node, get its operands
13767     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13768       SDValue In = N->getOperand(i).getOperand(0);
13769       if (Is32Bit) {
13770         // For 32-bit values, we need to add an FP_ROUND node (if we made it
13771         // here, we know that all inputs are extending loads so this is safe).
13772         if (In.isUndef())
13773           Ops.push_back(DAG.getUNDEF(SrcVT));
13774         else {
13775           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13776                                       MVT::f32, In.getOperand(0),
13777                                       DAG.getIntPtrConstant(1, dl));
13778           Ops.push_back(Trunc);
13779         }
13780       } else
13781         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13782     }
13783 
13784     unsigned Opcode;
13785     if (FirstConversion == PPCISD::FCTIDZ ||
13786         FirstConversion == PPCISD::FCTIWZ)
13787       Opcode = ISD::FP_TO_SINT;
13788     else
13789       Opcode = ISD::FP_TO_UINT;
13790 
13791     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13792     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13793     return DAG.getNode(Opcode, dl, TargetVT, BV);
13794   }
13795   return SDValue();
13796 }
13797 
13798 /// Reduce the number of loads when building a vector.
13799 ///
13800 /// Building a vector out of multiple loads can be converted to a load
13801 /// of the vector type if the loads are consecutive. If the loads are
13802 /// consecutive but in descending order, a shuffle is added at the end
13803 /// to reorder the vector.
13804 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
13805   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13806          "Should be called with a BUILD_VECTOR node");
13807 
13808   SDLoc dl(N);
13809 
13810   // Return early for non byte-sized type, as they can't be consecutive.
13811   if (!N->getValueType(0).getVectorElementType().isByteSized())
13812     return SDValue();
13813 
13814   bool InputsAreConsecutiveLoads = true;
13815   bool InputsAreReverseConsecutive = true;
13816   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13817   SDValue FirstInput = N->getOperand(0);
13818   bool IsRoundOfExtLoad = false;
13819 
13820   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13821       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13822     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13823     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13824   }
13825   // Not a build vector of (possibly fp_rounded) loads.
13826   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13827       N->getNumOperands() == 1)
13828     return SDValue();
13829 
13830   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13831     // If any inputs are fp_round(extload), they all must be.
13832     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13833       return SDValue();
13834 
13835     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13836       N->getOperand(i);
13837     if (NextInput.getOpcode() != ISD::LOAD)
13838       return SDValue();
13839 
13840     SDValue PreviousInput =
13841       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13842     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13843     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13844 
13845     // If any inputs are fp_round(extload), they all must be.
13846     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13847       return SDValue();
13848 
13849     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13850       InputsAreConsecutiveLoads = false;
13851     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13852       InputsAreReverseConsecutive = false;
13853 
13854     // Exit early if the loads are neither consecutive nor reverse consecutive.
13855     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13856       return SDValue();
13857   }
13858 
13859   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13860          "The loads cannot be both consecutive and reverse consecutive.");
13861 
13862   SDValue FirstLoadOp =
13863     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13864   SDValue LastLoadOp =
13865     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13866                        N->getOperand(N->getNumOperands()-1);
13867 
13868   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13869   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13870   if (InputsAreConsecutiveLoads) {
13871     assert(LD1 && "Input needs to be a LoadSDNode.");
13872     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13873                        LD1->getBasePtr(), LD1->getPointerInfo(),
13874                        LD1->getAlignment());
13875   }
13876   if (InputsAreReverseConsecutive) {
13877     assert(LDL && "Input needs to be a LoadSDNode.");
13878     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13879                                LDL->getBasePtr(), LDL->getPointerInfo(),
13880                                LDL->getAlignment());
13881     SmallVector<int, 16> Ops;
13882     for (int i = N->getNumOperands() - 1; i >= 0; i--)
13883       Ops.push_back(i);
13884 
13885     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13886                                 DAG.getUNDEF(N->getValueType(0)), Ops);
13887   }
13888   return SDValue();
13889 }
13890 
13891 // This function adds the required vector_shuffle needed to get
13892 // the elements of the vector extract in the correct position
13893 // as specified by the CorrectElems encoding.
13894 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
13895                                       SDValue Input, uint64_t Elems,
13896                                       uint64_t CorrectElems) {
13897   SDLoc dl(N);
13898 
13899   unsigned NumElems = Input.getValueType().getVectorNumElements();
13900   SmallVector<int, 16> ShuffleMask(NumElems, -1);
13901 
13902   // Knowing the element indices being extracted from the original
13903   // vector and the order in which they're being inserted, just put
13904   // them at element indices required for the instruction.
13905   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13906     if (DAG.getDataLayout().isLittleEndian())
13907       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13908     else
13909       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13910     CorrectElems = CorrectElems >> 8;
13911     Elems = Elems >> 8;
13912   }
13913 
13914   SDValue Shuffle =
13915       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13916                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13917 
13918   EVT VT = N->getValueType(0);
13919   SDValue Conv = DAG.getBitcast(VT, Shuffle);
13920 
13921   EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13922                                Input.getValueType().getVectorElementType(),
13923                                VT.getVectorNumElements());
13924   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13925                      DAG.getValueType(ExtVT));
13926 }
13927 
13928 // Look for build vector patterns where input operands come from sign
13929 // extended vector_extract elements of specific indices. If the correct indices
13930 // aren't used, add a vector shuffle to fix up the indices and create
13931 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13932 // during instruction selection.
13933 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
13934   // This array encodes the indices that the vector sign extend instructions
13935   // extract from when extending from one type to another for both BE and LE.
13936   // The right nibble of each byte corresponds to the LE incides.
13937   // and the left nibble of each byte corresponds to the BE incides.
13938   // For example: 0x3074B8FC  byte->word
13939   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13940   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13941   // For example: 0x000070F8  byte->double word
13942   // For LE: the allowed indices are: 0x0,0x8
13943   // For BE: the allowed indices are: 0x7,0xF
13944   uint64_t TargetElems[] = {
13945       0x3074B8FC, // b->w
13946       0x000070F8, // b->d
13947       0x10325476, // h->w
13948       0x00003074, // h->d
13949       0x00001032, // w->d
13950   };
13951 
13952   uint64_t Elems = 0;
13953   int Index;
13954   SDValue Input;
13955 
13956   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13957     if (!Op)
13958       return false;
13959     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13960         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13961       return false;
13962 
13963     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13964     // of the right width.
13965     SDValue Extract = Op.getOperand(0);
13966     if (Extract.getOpcode() == ISD::ANY_EXTEND)
13967       Extract = Extract.getOperand(0);
13968     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13969       return false;
13970 
13971     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13972     if (!ExtOp)
13973       return false;
13974 
13975     Index = ExtOp->getZExtValue();
13976     if (Input && Input != Extract.getOperand(0))
13977       return false;
13978 
13979     if (!Input)
13980       Input = Extract.getOperand(0);
13981 
13982     Elems = Elems << 8;
13983     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13984     Elems |= Index;
13985 
13986     return true;
13987   };
13988 
13989   // If the build vector operands aren't sign extended vector extracts,
13990   // of the same input vector, then return.
13991   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13992     if (!isSExtOfVecExtract(N->getOperand(i))) {
13993       return SDValue();
13994     }
13995   }
13996 
13997   // If the vector extract indicies are not correct, add the appropriate
13998   // vector_shuffle.
13999   int TgtElemArrayIdx;
14000   int InputSize = Input.getValueType().getScalarSizeInBits();
14001   int OutputSize = N->getValueType(0).getScalarSizeInBits();
14002   if (InputSize + OutputSize == 40)
14003     TgtElemArrayIdx = 0;
14004   else if (InputSize + OutputSize == 72)
14005     TgtElemArrayIdx = 1;
14006   else if (InputSize + OutputSize == 48)
14007     TgtElemArrayIdx = 2;
14008   else if (InputSize + OutputSize == 80)
14009     TgtElemArrayIdx = 3;
14010   else if (InputSize + OutputSize == 96)
14011     TgtElemArrayIdx = 4;
14012   else
14013     return SDValue();
14014 
14015   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14016   CorrectElems = DAG.getDataLayout().isLittleEndian()
14017                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14018                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14019   if (Elems != CorrectElems) {
14020     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14021   }
14022 
14023   // Regular lowering will catch cases where a shuffle is not needed.
14024   return SDValue();
14025 }
14026 
14027 // Look for the pattern of a load from a narrow width to i128, feeding
14028 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14029 // (LXVRZX). This node represents a zero extending load that will be matched
14030 // to the Load VSX Vector Rightmost instructions.
14031 static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
14032   SDLoc DL(N);
14033 
14034   // This combine is only eligible for a BUILD_VECTOR of v1i128.
14035   if (N->getValueType(0) != MVT::v1i128)
14036     return SDValue();
14037 
14038   SDValue Operand = N->getOperand(0);
14039   // Proceed with the transformation if the operand to the BUILD_VECTOR
14040   // is a load instruction.
14041   if (Operand.getOpcode() != ISD::LOAD)
14042     return SDValue();
14043 
14044   LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
14045   EVT MemoryType = LD->getMemoryVT();
14046 
14047   // This transformation is only valid if the we are loading either a byte,
14048   // halfword, word, or doubleword.
14049   bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14050                      MemoryType == MVT::i32 || MemoryType == MVT::i64;
14051 
14052   // Ensure that the load from the narrow width is being zero extended to i128.
14053   if (!ValidLDType ||
14054       (LD->getExtensionType() != ISD::ZEXTLOAD &&
14055        LD->getExtensionType() != ISD::EXTLOAD))
14056     return SDValue();
14057 
14058   SDValue LoadOps[] = {
14059       LD->getChain(), LD->getBasePtr(),
14060       DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14061 
14062   return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
14063                                  DAG.getVTList(MVT::v1i128, MVT::Other),
14064                                  LoadOps, MemoryType, LD->getMemOperand());
14065 }
14066 
14067 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14068                                                  DAGCombinerInfo &DCI) const {
14069   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14070          "Should be called with a BUILD_VECTOR node");
14071 
14072   SelectionDAG &DAG = DCI.DAG;
14073   SDLoc dl(N);
14074 
14075   if (!Subtarget.hasVSX())
14076     return SDValue();
14077 
14078   // The target independent DAG combiner will leave a build_vector of
14079   // float-to-int conversions intact. We can generate MUCH better code for
14080   // a float-to-int conversion of a vector of floats.
14081   SDValue FirstInput = N->getOperand(0);
14082   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14083     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14084     if (Reduced)
14085       return Reduced;
14086   }
14087 
14088   // If we're building a vector out of consecutive loads, just load that
14089   // vector type.
14090   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14091   if (Reduced)
14092     return Reduced;
14093 
14094   // If we're building a vector out of extended elements from another vector
14095   // we have P9 vector integer extend instructions. The code assumes legal
14096   // input types (i.e. it can't handle things like v4i16) so do not run before
14097   // legalization.
14098   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14099     Reduced = combineBVOfVecSExt(N, DAG);
14100     if (Reduced)
14101       return Reduced;
14102   }
14103 
14104   // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14105   // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14106   // is a load from <valid narrow width> to i128.
14107   if (Subtarget.isISA3_1()) {
14108     SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14109     if (BVOfZLoad)
14110       return BVOfZLoad;
14111   }
14112 
14113   if (N->getValueType(0) != MVT::v2f64)
14114     return SDValue();
14115 
14116   // Looking for:
14117   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14118   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14119       FirstInput.getOpcode() != ISD::UINT_TO_FP)
14120     return SDValue();
14121   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14122       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14123     return SDValue();
14124   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14125     return SDValue();
14126 
14127   SDValue Ext1 = FirstInput.getOperand(0);
14128   SDValue Ext2 = N->getOperand(1).getOperand(0);
14129   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14130      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14131     return SDValue();
14132 
14133   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14134   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14135   if (!Ext1Op || !Ext2Op)
14136     return SDValue();
14137   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14138       Ext1.getOperand(0) != Ext2.getOperand(0))
14139     return SDValue();
14140 
14141   int FirstElem = Ext1Op->getZExtValue();
14142   int SecondElem = Ext2Op->getZExtValue();
14143   int SubvecIdx;
14144   if (FirstElem == 0 && SecondElem == 1)
14145     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14146   else if (FirstElem == 2 && SecondElem == 3)
14147     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14148   else
14149     return SDValue();
14150 
14151   SDValue SrcVec = Ext1.getOperand(0);
14152   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14153     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
14154   return DAG.getNode(NodeType, dl, MVT::v2f64,
14155                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14156 }
14157 
14158 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14159                                               DAGCombinerInfo &DCI) const {
14160   assert((N->getOpcode() == ISD::SINT_TO_FP ||
14161           N->getOpcode() == ISD::UINT_TO_FP) &&
14162          "Need an int -> FP conversion node here");
14163 
14164   if (useSoftFloat() || !Subtarget.has64BitSupport())
14165     return SDValue();
14166 
14167   SelectionDAG &DAG = DCI.DAG;
14168   SDLoc dl(N);
14169   SDValue Op(N, 0);
14170 
14171   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14172   // from the hardware.
14173   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14174     return SDValue();
14175   if (!Op.getOperand(0).getValueType().isSimple())
14176     return SDValue();
14177   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14178       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14179     return SDValue();
14180 
14181   SDValue FirstOperand(Op.getOperand(0));
14182   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14183     (FirstOperand.getValueType() == MVT::i8 ||
14184      FirstOperand.getValueType() == MVT::i16);
14185   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14186     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14187     bool DstDouble = Op.getValueType() == MVT::f64;
14188     unsigned ConvOp = Signed ?
14189       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
14190       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14191     SDValue WidthConst =
14192       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14193                             dl, false);
14194     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14195     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14196     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
14197                                          DAG.getVTList(MVT::f64, MVT::Other),
14198                                          Ops, MVT::i8, LDN->getMemOperand());
14199 
14200     // For signed conversion, we need to sign-extend the value in the VSR
14201     if (Signed) {
14202       SDValue ExtOps[] = { Ld, WidthConst };
14203       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14204       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14205     } else
14206       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14207   }
14208 
14209 
14210   // For i32 intermediate values, unfortunately, the conversion functions
14211   // leave the upper 32 bits of the value are undefined. Within the set of
14212   // scalar instructions, we have no method for zero- or sign-extending the
14213   // value. Thus, we cannot handle i32 intermediate values here.
14214   if (Op.getOperand(0).getValueType() == MVT::i32)
14215     return SDValue();
14216 
14217   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14218          "UINT_TO_FP is supported only with FPCVT");
14219 
14220   // If we have FCFIDS, then use it when converting to single-precision.
14221   // Otherwise, convert to double-precision and then round.
14222   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14223                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14224                                                             : PPCISD::FCFIDS)
14225                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14226                                                             : PPCISD::FCFID);
14227   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14228                   ? MVT::f32
14229                   : MVT::f64;
14230 
14231   // If we're converting from a float, to an int, and back to a float again,
14232   // then we don't need the store/load pair at all.
14233   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14234        Subtarget.hasFPCVT()) ||
14235       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14236     SDValue Src = Op.getOperand(0).getOperand(0);
14237     if (Src.getValueType() == MVT::f32) {
14238       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14239       DCI.AddToWorklist(Src.getNode());
14240     } else if (Src.getValueType() != MVT::f64) {
14241       // Make sure that we don't pick up a ppc_fp128 source value.
14242       return SDValue();
14243     }
14244 
14245     unsigned FCTOp =
14246       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14247                                                         PPCISD::FCTIDUZ;
14248 
14249     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14250     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14251 
14252     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14253       FP = DAG.getNode(ISD::FP_ROUND, dl,
14254                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14255       DCI.AddToWorklist(FP.getNode());
14256     }
14257 
14258     return FP;
14259   }
14260 
14261   return SDValue();
14262 }
14263 
14264 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14265 // builtins) into loads with swaps.
14266 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
14267                                               DAGCombinerInfo &DCI) const {
14268   SelectionDAG &DAG = DCI.DAG;
14269   SDLoc dl(N);
14270   SDValue Chain;
14271   SDValue Base;
14272   MachineMemOperand *MMO;
14273 
14274   switch (N->getOpcode()) {
14275   default:
14276     llvm_unreachable("Unexpected opcode for little endian VSX load");
14277   case ISD::LOAD: {
14278     LoadSDNode *LD = cast<LoadSDNode>(N);
14279     Chain = LD->getChain();
14280     Base = LD->getBasePtr();
14281     MMO = LD->getMemOperand();
14282     // If the MMO suggests this isn't a load of a full vector, leave
14283     // things alone.  For a built-in, we have to make the change for
14284     // correctness, so if there is a size problem that will be a bug.
14285     if (MMO->getSize() < 16)
14286       return SDValue();
14287     break;
14288   }
14289   case ISD::INTRINSIC_W_CHAIN: {
14290     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14291     Chain = Intrin->getChain();
14292     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14293     // us what we want. Get operand 2 instead.
14294     Base = Intrin->getOperand(2);
14295     MMO = Intrin->getMemOperand();
14296     break;
14297   }
14298   }
14299 
14300   MVT VecTy = N->getValueType(0).getSimpleVT();
14301 
14302   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
14303   // aligned and the type is a vector with elements up to 4 bytes
14304   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14305       VecTy.getScalarSizeInBits() <= 32) {
14306     return SDValue();
14307   }
14308 
14309   SDValue LoadOps[] = { Chain, Base };
14310   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
14311                                          DAG.getVTList(MVT::v2f64, MVT::Other),
14312                                          LoadOps, MVT::v2f64, MMO);
14313 
14314   DCI.AddToWorklist(Load.getNode());
14315   Chain = Load.getValue(1);
14316   SDValue Swap = DAG.getNode(
14317       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14318   DCI.AddToWorklist(Swap.getNode());
14319 
14320   // Add a bitcast if the resulting load type doesn't match v2f64.
14321   if (VecTy != MVT::v2f64) {
14322     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14323     DCI.AddToWorklist(N.getNode());
14324     // Package {bitcast value, swap's chain} to match Load's shape.
14325     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14326                        N, Swap.getValue(1));
14327   }
14328 
14329   return Swap;
14330 }
14331 
14332 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14333 // builtins) into stores with swaps.
14334 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
14335                                                DAGCombinerInfo &DCI) const {
14336   SelectionDAG &DAG = DCI.DAG;
14337   SDLoc dl(N);
14338   SDValue Chain;
14339   SDValue Base;
14340   unsigned SrcOpnd;
14341   MachineMemOperand *MMO;
14342 
14343   switch (N->getOpcode()) {
14344   default:
14345     llvm_unreachable("Unexpected opcode for little endian VSX store");
14346   case ISD::STORE: {
14347     StoreSDNode *ST = cast<StoreSDNode>(N);
14348     Chain = ST->getChain();
14349     Base = ST->getBasePtr();
14350     MMO = ST->getMemOperand();
14351     SrcOpnd = 1;
14352     // If the MMO suggests this isn't a store of a full vector, leave
14353     // things alone.  For a built-in, we have to make the change for
14354     // correctness, so if there is a size problem that will be a bug.
14355     if (MMO->getSize() < 16)
14356       return SDValue();
14357     break;
14358   }
14359   case ISD::INTRINSIC_VOID: {
14360     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14361     Chain = Intrin->getChain();
14362     // Intrin->getBasePtr() oddly does not get what we want.
14363     Base = Intrin->getOperand(3);
14364     MMO = Intrin->getMemOperand();
14365     SrcOpnd = 2;
14366     break;
14367   }
14368   }
14369 
14370   SDValue Src = N->getOperand(SrcOpnd);
14371   MVT VecTy = Src.getValueType().getSimpleVT();
14372 
14373   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
14374   // aligned and the type is a vector with elements up to 4 bytes
14375   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14376       VecTy.getScalarSizeInBits() <= 32) {
14377     return SDValue();
14378   }
14379 
14380   // All stores are done as v2f64 and possible bit cast.
14381   if (VecTy != MVT::v2f64) {
14382     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14383     DCI.AddToWorklist(Src.getNode());
14384   }
14385 
14386   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14387                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14388   DCI.AddToWorklist(Swap.getNode());
14389   Chain = Swap.getValue(1);
14390   SDValue StoreOps[] = { Chain, Swap, Base };
14391   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
14392                                           DAG.getVTList(MVT::Other),
14393                                           StoreOps, VecTy, MMO);
14394   DCI.AddToWorklist(Store.getNode());
14395   return Store;
14396 }
14397 
14398 // Handle DAG combine for STORE (FP_TO_INT F).
14399 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14400                                                DAGCombinerInfo &DCI) const {
14401 
14402   SelectionDAG &DAG = DCI.DAG;
14403   SDLoc dl(N);
14404   unsigned Opcode = N->getOperand(1).getOpcode();
14405 
14406   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
14407          && "Not a FP_TO_INT Instruction!");
14408 
14409   SDValue Val = N->getOperand(1).getOperand(0);
14410   EVT Op1VT = N->getOperand(1).getValueType();
14411   EVT ResVT = Val.getValueType();
14412 
14413   if (!isTypeLegal(ResVT))
14414     return SDValue();
14415 
14416   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14417   bool ValidTypeForStoreFltAsInt =
14418         (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14419          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14420 
14421   if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14422       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14423     return SDValue();
14424 
14425   // Extend f32 values to f64
14426   if (ResVT.getScalarSizeInBits() == 32) {
14427     Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14428     DCI.AddToWorklist(Val.getNode());
14429   }
14430 
14431   // Set signed or unsigned conversion opcode.
14432   unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14433                           PPCISD::FP_TO_SINT_IN_VSR :
14434                           PPCISD::FP_TO_UINT_IN_VSR;
14435 
14436   Val = DAG.getNode(ConvOpcode,
14437                     dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14438   DCI.AddToWorklist(Val.getNode());
14439 
14440   // Set number of bytes being converted.
14441   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14442   SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14443                     DAG.getIntPtrConstant(ByteSize, dl, false),
14444                     DAG.getValueType(Op1VT) };
14445 
14446   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
14447           DAG.getVTList(MVT::Other), Ops,
14448           cast<StoreSDNode>(N)->getMemoryVT(),
14449           cast<StoreSDNode>(N)->getMemOperand());
14450 
14451   DCI.AddToWorklist(Val.getNode());
14452   return Val;
14453 }
14454 
14455 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14456   // Check that the source of the element keeps flipping
14457   // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14458   bool PrevElemFromFirstVec = Mask[0] < NumElts;
14459   for (int i = 1, e = Mask.size(); i < e; i++) {
14460     if (PrevElemFromFirstVec && Mask[i] < NumElts)
14461       return false;
14462     if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14463       return false;
14464     PrevElemFromFirstVec = !PrevElemFromFirstVec;
14465   }
14466   return true;
14467 }
14468 
14469 static bool isSplatBV(SDValue Op) {
14470   if (Op.getOpcode() != ISD::BUILD_VECTOR)
14471     return false;
14472   SDValue FirstOp;
14473 
14474   // Find first non-undef input.
14475   for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14476     FirstOp = Op.getOperand(i);
14477     if (!FirstOp.isUndef())
14478       break;
14479   }
14480 
14481   // All inputs are undef or the same as the first non-undef input.
14482   for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14483     if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14484       return false;
14485   return true;
14486 }
14487 
14488 static SDValue isScalarToVec(SDValue Op) {
14489   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14490     return Op;
14491   if (Op.getOpcode() != ISD::BITCAST)
14492     return SDValue();
14493   Op = Op.getOperand(0);
14494   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14495     return Op;
14496   return SDValue();
14497 }
14498 
14499 static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
14500                                             int LHSMaxIdx, int RHSMinIdx,
14501                                             int RHSMaxIdx, int HalfVec) {
14502   for (int i = 0, e = ShuffV.size(); i < e; i++) {
14503     int Idx = ShuffV[i];
14504     if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14505       ShuffV[i] += HalfVec;
14506   }
14507   return;
14508 }
14509 
14510 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14511 // the original is:
14512 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14513 // In such a case, just change the shuffle mask to extract the element
14514 // from the permuted index.
14515 static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
14516   SDLoc dl(OrigSToV);
14517   EVT VT = OrigSToV.getValueType();
14518   assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14519          "Expecting a SCALAR_TO_VECTOR here");
14520   SDValue Input = OrigSToV.getOperand(0);
14521 
14522   if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14523     ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14524     SDValue OrigVector = Input.getOperand(0);
14525 
14526     // Can't handle non-const element indices or different vector types
14527     // for the input to the extract and the output of the scalar_to_vector.
14528     if (Idx && VT == OrigVector.getValueType()) {
14529       SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
14530       NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
14531       return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14532     }
14533   }
14534   return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14535                      OrigSToV.getOperand(0));
14536 }
14537 
14538 // On little endian subtargets, combine shuffles such as:
14539 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14540 // into:
14541 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14542 // because the latter can be matched to a single instruction merge.
14543 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14544 // to put the value into element zero. Adjust the shuffle mask so that the
14545 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
14546 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14547                                                 SelectionDAG &DAG) const {
14548   SDValue LHS = SVN->getOperand(0);
14549   SDValue RHS = SVN->getOperand(1);
14550   auto Mask = SVN->getMask();
14551   int NumElts = LHS.getValueType().getVectorNumElements();
14552   SDValue Res(SVN, 0);
14553   SDLoc dl(SVN);
14554 
14555   // None of these combines are useful on big endian systems since the ISA
14556   // already has a big endian bias.
14557   if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14558     return Res;
14559 
14560   // If this is not a shuffle of a shuffle and the first element comes from
14561   // the second vector, canonicalize to the commuted form. This will make it
14562   // more likely to match one of the single instruction patterns.
14563   if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14564       RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14565     std::swap(LHS, RHS);
14566     Res = DAG.getCommutedVectorShuffle(*SVN);
14567     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14568   }
14569 
14570   // Adjust the shuffle mask if either input vector comes from a
14571   // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14572   // form (to prevent the need for a swap).
14573   SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14574   SDValue SToVLHS = isScalarToVec(LHS);
14575   SDValue SToVRHS = isScalarToVec(RHS);
14576   if (SToVLHS || SToVRHS) {
14577     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14578                             : SToVRHS.getValueType().getVectorNumElements();
14579     int NumEltsOut = ShuffV.size();
14580 
14581     // Initially assume that neither input is permuted. These will be adjusted
14582     // accordingly if either input is.
14583     int LHSMaxIdx = -1;
14584     int RHSMinIdx = -1;
14585     int RHSMaxIdx = -1;
14586     int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14587 
14588     // Get the permuted scalar to vector nodes for the source(s) that come from
14589     // ISD::SCALAR_TO_VECTOR.
14590     if (SToVLHS) {
14591       // Set up the values for the shuffle vector fixup.
14592       LHSMaxIdx = NumEltsOut / NumEltsIn;
14593       SToVLHS = getSToVPermuted(SToVLHS, DAG);
14594       if (SToVLHS.getValueType() != LHS.getValueType())
14595         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14596       LHS = SToVLHS;
14597     }
14598     if (SToVRHS) {
14599       RHSMinIdx = NumEltsOut;
14600       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14601       SToVRHS = getSToVPermuted(SToVRHS, DAG);
14602       if (SToVRHS.getValueType() != RHS.getValueType())
14603         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14604       RHS = SToVRHS;
14605     }
14606 
14607     // Fix up the shuffle mask to reflect where the desired element actually is.
14608     // The minimum and maximum indices that correspond to element zero for both
14609     // the LHS and RHS are computed and will control which shuffle mask entries
14610     // are to be changed. For example, if the RHS is permuted, any shuffle mask
14611     // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14612     // HalfVec to refer to the corresponding element in the permuted vector.
14613     fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14614                                     HalfVec);
14615     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14616 
14617     // We may have simplified away the shuffle. We won't be able to do anything
14618     // further with it here.
14619     if (!isa<ShuffleVectorSDNode>(Res))
14620       return Res;
14621     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14622   }
14623 
14624   // The common case after we commuted the shuffle is that the RHS is a splat
14625   // and we have elements coming in from the splat at indices that are not
14626   // conducive to using a merge.
14627   // Example:
14628   // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14629   if (!isSplatBV(RHS))
14630     return Res;
14631 
14632   // We are looking for a mask such that all even elements are from
14633   // one vector and all odd elements from the other.
14634   if (!isAlternatingShuffMask(Mask, NumElts))
14635     return Res;
14636 
14637   // Adjust the mask so we are pulling in the same index from the splat
14638   // as the index from the interesting vector in consecutive elements.
14639   // Example (even elements from first vector):
14640   // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14641   if (Mask[0] < NumElts)
14642     for (int i = 1, e = Mask.size(); i < e; i += 2)
14643       ShuffV[i] = (ShuffV[i - 1] + NumElts);
14644   // Example (odd elements from first vector):
14645   // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14646   else
14647     for (int i = 0, e = Mask.size(); i < e; i += 2)
14648       ShuffV[i] = (ShuffV[i + 1] + NumElts);
14649 
14650   // If the RHS has undefs, we need to remove them since we may have created
14651   // a shuffle that adds those instead of the splat value.
14652   SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14653   RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14654 
14655   Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14656   return Res;
14657 }
14658 
14659 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14660                                                 LSBaseSDNode *LSBase,
14661                                                 DAGCombinerInfo &DCI) const {
14662   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14663         "Not a reverse memop pattern!");
14664 
14665   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14666     auto Mask = SVN->getMask();
14667     int i = 0;
14668     auto I = Mask.rbegin();
14669     auto E = Mask.rend();
14670 
14671     for (; I != E; ++I) {
14672       if (*I != i)
14673         return false;
14674       i++;
14675     }
14676     return true;
14677   };
14678 
14679   SelectionDAG &DAG = DCI.DAG;
14680   EVT VT = SVN->getValueType(0);
14681 
14682   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14683     return SDValue();
14684 
14685   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14686   // See comment in PPCVSXSwapRemoval.cpp.
14687   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14688   if (!Subtarget.hasP9Vector())
14689     return SDValue();
14690 
14691   if(!IsElementReverse(SVN))
14692     return SDValue();
14693 
14694   if (LSBase->getOpcode() == ISD::LOAD) {
14695     SDLoc dl(SVN);
14696     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14697     return DAG.getMemIntrinsicNode(
14698         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14699         LSBase->getMemoryVT(), LSBase->getMemOperand());
14700   }
14701 
14702   if (LSBase->getOpcode() == ISD::STORE) {
14703     SDLoc dl(LSBase);
14704     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14705                           LSBase->getBasePtr()};
14706     return DAG.getMemIntrinsicNode(
14707         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14708         LSBase->getMemoryVT(), LSBase->getMemOperand());
14709   }
14710 
14711   llvm_unreachable("Expected a load or store node here");
14712 }
14713 
14714 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
14715                                              DAGCombinerInfo &DCI) const {
14716   SelectionDAG &DAG = DCI.DAG;
14717   SDLoc dl(N);
14718   switch (N->getOpcode()) {
14719   default: break;
14720   case ISD::ADD:
14721     return combineADD(N, DCI);
14722   case ISD::SHL:
14723     return combineSHL(N, DCI);
14724   case ISD::SRA:
14725     return combineSRA(N, DCI);
14726   case ISD::SRL:
14727     return combineSRL(N, DCI);
14728   case ISD::MUL:
14729     return combineMUL(N, DCI);
14730   case ISD::FMA:
14731   case PPCISD::FNMSUB:
14732     return combineFMALike(N, DCI);
14733   case PPCISD::SHL:
14734     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14735         return N->getOperand(0);
14736     break;
14737   case PPCISD::SRL:
14738     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14739         return N->getOperand(0);
14740     break;
14741   case PPCISD::SRA:
14742     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14743       if (C->isNullValue() ||   //  0 >>s V -> 0.
14744           C->isAllOnesValue())    // -1 >>s V -> -1.
14745         return N->getOperand(0);
14746     }
14747     break;
14748   case ISD::SIGN_EXTEND:
14749   case ISD::ZERO_EXTEND:
14750   case ISD::ANY_EXTEND:
14751     return DAGCombineExtBoolTrunc(N, DCI);
14752   case ISD::TRUNCATE:
14753     return combineTRUNCATE(N, DCI);
14754   case ISD::SETCC:
14755     if (SDValue CSCC = combineSetCC(N, DCI))
14756       return CSCC;
14757     LLVM_FALLTHROUGH;
14758   case ISD::SELECT_CC:
14759     return DAGCombineTruncBoolExt(N, DCI);
14760   case ISD::SINT_TO_FP:
14761   case ISD::UINT_TO_FP:
14762     return combineFPToIntToFP(N, DCI);
14763   case ISD::VECTOR_SHUFFLE:
14764     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14765       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14766       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14767     }
14768     return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14769   case ISD::STORE: {
14770 
14771     EVT Op1VT = N->getOperand(1).getValueType();
14772     unsigned Opcode = N->getOperand(1).getOpcode();
14773 
14774     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14775       SDValue Val= combineStoreFPToInt(N, DCI);
14776       if (Val)
14777         return Val;
14778     }
14779 
14780     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14781       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14782       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14783       if (Val)
14784         return Val;
14785     }
14786 
14787     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14788     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14789         N->getOperand(1).getNode()->hasOneUse() &&
14790         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14791          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14792 
14793       // STBRX can only handle simple types and it makes no sense to store less
14794       // two bytes in byte-reversed order.
14795       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14796       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14797         break;
14798 
14799       SDValue BSwapOp = N->getOperand(1).getOperand(0);
14800       // Do an any-extend to 32-bits if this is a half-word input.
14801       if (BSwapOp.getValueType() == MVT::i16)
14802         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14803 
14804       // If the type of BSWAP operand is wider than stored memory width
14805       // it need to be shifted to the right side before STBRX.
14806       if (Op1VT.bitsGT(mVT)) {
14807         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14808         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14809                               DAG.getConstant(Shift, dl, MVT::i32));
14810         // Need to truncate if this is a bswap of i64 stored as i32/i16.
14811         if (Op1VT == MVT::i64)
14812           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14813       }
14814 
14815       SDValue Ops[] = {
14816         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14817       };
14818       return
14819         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
14820                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14821                                 cast<StoreSDNode>(N)->getMemOperand());
14822     }
14823 
14824     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
14825     // So it can increase the chance of CSE constant construction.
14826     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14827         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14828       // Need to sign-extended to 64-bits to handle negative values.
14829       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14830       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14831                                     MemVT.getSizeInBits());
14832       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14833 
14834       // DAG.getTruncStore() can't be used here because it doesn't accept
14835       // the general (base + offset) addressing mode.
14836       // So we use UpdateNodeOperands and setTruncatingStore instead.
14837       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14838                              N->getOperand(3));
14839       cast<StoreSDNode>(N)->setTruncatingStore(true);
14840       return SDValue(N, 0);
14841     }
14842 
14843     // For little endian, VSX stores require generating xxswapd/lxvd2x.
14844     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14845     if (Op1VT.isSimple()) {
14846       MVT StoreVT = Op1VT.getSimpleVT();
14847       if (Subtarget.needsSwapsForVSXMemOps() &&
14848           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14849            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14850         return expandVSXStoreForLE(N, DCI);
14851     }
14852     break;
14853   }
14854   case ISD::LOAD: {
14855     LoadSDNode *LD = cast<LoadSDNode>(N);
14856     EVT VT = LD->getValueType(0);
14857 
14858     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14859     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14860     if (VT.isSimple()) {
14861       MVT LoadVT = VT.getSimpleVT();
14862       if (Subtarget.needsSwapsForVSXMemOps() &&
14863           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14864            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14865         return expandVSXLoadForLE(N, DCI);
14866     }
14867 
14868     // We sometimes end up with a 64-bit integer load, from which we extract
14869     // two single-precision floating-point numbers. This happens with
14870     // std::complex<float>, and other similar structures, because of the way we
14871     // canonicalize structure copies. However, if we lack direct moves,
14872     // then the final bitcasts from the extracted integer values to the
14873     // floating-point numbers turn into store/load pairs. Even with direct moves,
14874     // just loading the two floating-point numbers is likely better.
14875     auto ReplaceTwoFloatLoad = [&]() {
14876       if (VT != MVT::i64)
14877         return false;
14878 
14879       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14880           LD->isVolatile())
14881         return false;
14882 
14883       //  We're looking for a sequence like this:
14884       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14885       //      t16: i64 = srl t13, Constant:i32<32>
14886       //    t17: i32 = truncate t16
14887       //  t18: f32 = bitcast t17
14888       //    t19: i32 = truncate t13
14889       //  t20: f32 = bitcast t19
14890 
14891       if (!LD->hasNUsesOfValue(2, 0))
14892         return false;
14893 
14894       auto UI = LD->use_begin();
14895       while (UI.getUse().getResNo() != 0) ++UI;
14896       SDNode *Trunc = *UI++;
14897       while (UI.getUse().getResNo() != 0) ++UI;
14898       SDNode *RightShift = *UI;
14899       if (Trunc->getOpcode() != ISD::TRUNCATE)
14900         std::swap(Trunc, RightShift);
14901 
14902       if (Trunc->getOpcode() != ISD::TRUNCATE ||
14903           Trunc->getValueType(0) != MVT::i32 ||
14904           !Trunc->hasOneUse())
14905         return false;
14906       if (RightShift->getOpcode() != ISD::SRL ||
14907           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14908           RightShift->getConstantOperandVal(1) != 32 ||
14909           !RightShift->hasOneUse())
14910         return false;
14911 
14912       SDNode *Trunc2 = *RightShift->use_begin();
14913       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14914           Trunc2->getValueType(0) != MVT::i32 ||
14915           !Trunc2->hasOneUse())
14916         return false;
14917 
14918       SDNode *Bitcast = *Trunc->use_begin();
14919       SDNode *Bitcast2 = *Trunc2->use_begin();
14920 
14921       if (Bitcast->getOpcode() != ISD::BITCAST ||
14922           Bitcast->getValueType(0) != MVT::f32)
14923         return false;
14924       if (Bitcast2->getOpcode() != ISD::BITCAST ||
14925           Bitcast2->getValueType(0) != MVT::f32)
14926         return false;
14927 
14928       if (Subtarget.isLittleEndian())
14929         std::swap(Bitcast, Bitcast2);
14930 
14931       // Bitcast has the second float (in memory-layout order) and Bitcast2
14932       // has the first one.
14933 
14934       SDValue BasePtr = LD->getBasePtr();
14935       if (LD->isIndexed()) {
14936         assert(LD->getAddressingMode() == ISD::PRE_INC &&
14937                "Non-pre-inc AM on PPC?");
14938         BasePtr =
14939           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14940                       LD->getOffset());
14941       }
14942 
14943       auto MMOFlags =
14944           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14945       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14946                                       LD->getPointerInfo(), LD->getAlignment(),
14947                                       MMOFlags, LD->getAAInfo());
14948       SDValue AddPtr =
14949         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14950                     BasePtr, DAG.getIntPtrConstant(4, dl));
14951       SDValue FloatLoad2 = DAG.getLoad(
14952           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14953           LD->getPointerInfo().getWithOffset(4),
14954           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14955 
14956       if (LD->isIndexed()) {
14957         // Note that DAGCombine should re-form any pre-increment load(s) from
14958         // what is produced here if that makes sense.
14959         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14960       }
14961 
14962       DCI.CombineTo(Bitcast2, FloatLoad);
14963       DCI.CombineTo(Bitcast, FloatLoad2);
14964 
14965       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14966                                     SDValue(FloatLoad2.getNode(), 1));
14967       return true;
14968     };
14969 
14970     if (ReplaceTwoFloatLoad())
14971       return SDValue(N, 0);
14972 
14973     EVT MemVT = LD->getMemoryVT();
14974     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14975     Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14976     if (LD->isUnindexed() && VT.isVector() &&
14977         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14978           // P8 and later hardware should just use LOAD.
14979           !Subtarget.hasP8Vector() &&
14980           (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14981            VT == MVT::v4f32))) &&
14982         LD->getAlign() < ABIAlignment) {
14983       // This is a type-legal unaligned Altivec load.
14984       SDValue Chain = LD->getChain();
14985       SDValue Ptr = LD->getBasePtr();
14986       bool isLittleEndian = Subtarget.isLittleEndian();
14987 
14988       // This implements the loading of unaligned vectors as described in
14989       // the venerable Apple Velocity Engine overview. Specifically:
14990       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14991       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14992       //
14993       // The general idea is to expand a sequence of one or more unaligned
14994       // loads into an alignment-based permutation-control instruction (lvsl
14995       // or lvsr), a series of regular vector loads (which always truncate
14996       // their input address to an aligned address), and a series of
14997       // permutations.  The results of these permutations are the requested
14998       // loaded values.  The trick is that the last "extra" load is not taken
14999       // from the address you might suspect (sizeof(vector) bytes after the
15000       // last requested load), but rather sizeof(vector) - 1 bytes after the
15001       // last requested vector. The point of this is to avoid a page fault if
15002       // the base address happened to be aligned. This works because if the
15003       // base address is aligned, then adding less than a full vector length
15004       // will cause the last vector in the sequence to be (re)loaded.
15005       // Otherwise, the next vector will be fetched as you might suspect was
15006       // necessary.
15007 
15008       // We might be able to reuse the permutation generation from
15009       // a different base address offset from this one by an aligned amount.
15010       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15011       // optimization later.
15012       Intrinsic::ID Intr, IntrLD, IntrPerm;
15013       MVT PermCntlTy, PermTy, LDTy;
15014       Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15015                             : Intrinsic::ppc_altivec_lvsl;
15016       IntrLD = Intrinsic::ppc_altivec_lvx;
15017       IntrPerm = Intrinsic::ppc_altivec_vperm;
15018       PermCntlTy = MVT::v16i8;
15019       PermTy = MVT::v4i32;
15020       LDTy = MVT::v4i32;
15021 
15022       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
15023 
15024       // Create the new MMO for the new base load. It is like the original MMO,
15025       // but represents an area in memory almost twice the vector size centered
15026       // on the original address. If the address is unaligned, we might start
15027       // reading up to (sizeof(vector)-1) bytes below the address of the
15028       // original unaligned load.
15029       MachineFunction &MF = DAG.getMachineFunction();
15030       MachineMemOperand *BaseMMO =
15031         MF.getMachineMemOperand(LD->getMemOperand(),
15032                                 -(long)MemVT.getStoreSize()+1,
15033                                 2*MemVT.getStoreSize()-1);
15034 
15035       // Create the new base load.
15036       SDValue LDXIntID =
15037           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
15038       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15039       SDValue BaseLoad =
15040         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
15041                                 DAG.getVTList(PermTy, MVT::Other),
15042                                 BaseLoadOps, LDTy, BaseMMO);
15043 
15044       // Note that the value of IncOffset (which is provided to the next
15045       // load's pointer info offset value, and thus used to calculate the
15046       // alignment), and the value of IncValue (which is actually used to
15047       // increment the pointer value) are different! This is because we
15048       // require the next load to appear to be aligned, even though it
15049       // is actually offset from the base pointer by a lesser amount.
15050       int IncOffset = VT.getSizeInBits() / 8;
15051       int IncValue = IncOffset;
15052 
15053       // Walk (both up and down) the chain looking for another load at the real
15054       // (aligned) offset (the alignment of the other load does not matter in
15055       // this case). If found, then do not use the offset reduction trick, as
15056       // that will prevent the loads from being later combined (as they would
15057       // otherwise be duplicates).
15058       if (!findConsecutiveLoad(LD, DAG))
15059         --IncValue;
15060 
15061       SDValue Increment =
15062           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
15063       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15064 
15065       MachineMemOperand *ExtraMMO =
15066         MF.getMachineMemOperand(LD->getMemOperand(),
15067                                 1, 2*MemVT.getStoreSize()-1);
15068       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15069       SDValue ExtraLoad =
15070         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
15071                                 DAG.getVTList(PermTy, MVT::Other),
15072                                 ExtraLoadOps, LDTy, ExtraMMO);
15073 
15074       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
15075         BaseLoad.getValue(1), ExtraLoad.getValue(1));
15076 
15077       // Because vperm has a big-endian bias, we must reverse the order
15078       // of the input vectors and complement the permute control vector
15079       // when generating little endian code.  We have already handled the
15080       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15081       // and ExtraLoad here.
15082       SDValue Perm;
15083       if (isLittleEndian)
15084         Perm = BuildIntrinsicOp(IntrPerm,
15085                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15086       else
15087         Perm = BuildIntrinsicOp(IntrPerm,
15088                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15089 
15090       if (VT != PermTy)
15091         Perm = Subtarget.hasAltivec()
15092                    ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15093                    : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15094                                  DAG.getTargetConstant(1, dl, MVT::i64));
15095                                // second argument is 1 because this rounding
15096                                // is always exact.
15097 
15098       // The output of the permutation is our loaded result, the TokenFactor is
15099       // our new chain.
15100       DCI.CombineTo(N, Perm, TF);
15101       return SDValue(N, 0);
15102     }
15103     }
15104     break;
15105     case ISD::INTRINSIC_WO_CHAIN: {
15106       bool isLittleEndian = Subtarget.isLittleEndian();
15107       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15108       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15109                                            : Intrinsic::ppc_altivec_lvsl);
15110       if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15111         SDValue Add = N->getOperand(1);
15112 
15113         int Bits = 4 /* 16 byte alignment */;
15114 
15115         if (DAG.MaskedValueIsZero(Add->getOperand(1),
15116                                   APInt::getAllOnesValue(Bits /* alignment */)
15117                                       .zext(Add.getScalarValueSizeInBits()))) {
15118           SDNode *BasePtr = Add->getOperand(0).getNode();
15119           for (SDNode::use_iterator UI = BasePtr->use_begin(),
15120                                     UE = BasePtr->use_end();
15121                UI != UE; ++UI) {
15122             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15123                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
15124                     IID) {
15125               // We've found another LVSL/LVSR, and this address is an aligned
15126               // multiple of that one. The results will be the same, so use the
15127               // one we've just found instead.
15128 
15129               return SDValue(*UI, 0);
15130             }
15131           }
15132         }
15133 
15134         if (isa<ConstantSDNode>(Add->getOperand(1))) {
15135           SDNode *BasePtr = Add->getOperand(0).getNode();
15136           for (SDNode::use_iterator UI = BasePtr->use_begin(),
15137                UE = BasePtr->use_end(); UI != UE; ++UI) {
15138             if (UI->getOpcode() == ISD::ADD &&
15139                 isa<ConstantSDNode>(UI->getOperand(1)) &&
15140                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15141                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
15142                 (1ULL << Bits) == 0) {
15143               SDNode *OtherAdd = *UI;
15144               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
15145                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
15146                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15147                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
15148                   return SDValue(*VI, 0);
15149                 }
15150               }
15151             }
15152           }
15153         }
15154       }
15155 
15156       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15157       // Expose the vabsduw/h/b opportunity for down stream
15158       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15159           (IID == Intrinsic::ppc_altivec_vmaxsw ||
15160            IID == Intrinsic::ppc_altivec_vmaxsh ||
15161            IID == Intrinsic::ppc_altivec_vmaxsb)) {
15162         SDValue V1 = N->getOperand(1);
15163         SDValue V2 = N->getOperand(2);
15164         if ((V1.getSimpleValueType() == MVT::v4i32 ||
15165              V1.getSimpleValueType() == MVT::v8i16 ||
15166              V1.getSimpleValueType() == MVT::v16i8) &&
15167             V1.getSimpleValueType() == V2.getSimpleValueType()) {
15168           // (0-a, a)
15169           if (V1.getOpcode() == ISD::SUB &&
15170               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
15171               V1.getOperand(1) == V2) {
15172             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15173           }
15174           // (a, 0-a)
15175           if (V2.getOpcode() == ISD::SUB &&
15176               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15177               V2.getOperand(1) == V1) {
15178             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15179           }
15180           // (x-y, y-x)
15181           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15182               V1.getOperand(0) == V2.getOperand(1) &&
15183               V1.getOperand(1) == V2.getOperand(0)) {
15184             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15185           }
15186         }
15187       }
15188     }
15189 
15190     break;
15191   case ISD::INTRINSIC_W_CHAIN:
15192     // For little endian, VSX loads require generating lxvd2x/xxswapd.
15193     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15194     if (Subtarget.needsSwapsForVSXMemOps()) {
15195       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15196       default:
15197         break;
15198       case Intrinsic::ppc_vsx_lxvw4x:
15199       case Intrinsic::ppc_vsx_lxvd2x:
15200         return expandVSXLoadForLE(N, DCI);
15201       }
15202     }
15203     break;
15204   case ISD::INTRINSIC_VOID:
15205     // For little endian, VSX stores require generating xxswapd/stxvd2x.
15206     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15207     if (Subtarget.needsSwapsForVSXMemOps()) {
15208       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15209       default:
15210         break;
15211       case Intrinsic::ppc_vsx_stxvw4x:
15212       case Intrinsic::ppc_vsx_stxvd2x:
15213         return expandVSXStoreForLE(N, DCI);
15214       }
15215     }
15216     break;
15217   case ISD::BSWAP:
15218     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15219     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
15220         N->getOperand(0).hasOneUse() &&
15221         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15222          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
15223           N->getValueType(0) == MVT::i64))) {
15224       SDValue Load = N->getOperand(0);
15225       LoadSDNode *LD = cast<LoadSDNode>(Load);
15226       // Create the byte-swapping load.
15227       SDValue Ops[] = {
15228         LD->getChain(),    // Chain
15229         LD->getBasePtr(),  // Ptr
15230         DAG.getValueType(N->getValueType(0)) // VT
15231       };
15232       SDValue BSLoad =
15233         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
15234                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15235                                               MVT::i64 : MVT::i32, MVT::Other),
15236                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
15237 
15238       // If this is an i16 load, insert the truncate.
15239       SDValue ResVal = BSLoad;
15240       if (N->getValueType(0) == MVT::i16)
15241         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15242 
15243       // First, combine the bswap away.  This makes the value produced by the
15244       // load dead.
15245       DCI.CombineTo(N, ResVal);
15246 
15247       // Next, combine the load away, we give it a bogus result value but a real
15248       // chain result.  The result value is dead because the bswap is dead.
15249       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15250 
15251       // Return N so it doesn't get rechecked!
15252       return SDValue(N, 0);
15253     }
15254     break;
15255   case PPCISD::VCMP:
15256     // If a VCMP_rec node already exists with exactly the same operands as this
15257     // node, use its result instead of this node (VCMP_rec computes both a CR6
15258     // and a normal output).
15259     //
15260     if (!N->getOperand(0).hasOneUse() &&
15261         !N->getOperand(1).hasOneUse() &&
15262         !N->getOperand(2).hasOneUse()) {
15263 
15264       // Scan all of the users of the LHS, looking for VCMP_rec's that match.
15265       SDNode *VCMPrecNode = nullptr;
15266 
15267       SDNode *LHSN = N->getOperand(0).getNode();
15268       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15269            UI != E; ++UI)
15270         if (UI->getOpcode() == PPCISD::VCMP_rec &&
15271             UI->getOperand(1) == N->getOperand(1) &&
15272             UI->getOperand(2) == N->getOperand(2) &&
15273             UI->getOperand(0) == N->getOperand(0)) {
15274           VCMPrecNode = *UI;
15275           break;
15276         }
15277 
15278       // If there is no VCMP_rec node, or if the flag value has a single use,
15279       // don't transform this.
15280       if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
15281         break;
15282 
15283       // Look at the (necessarily single) use of the flag value.  If it has a
15284       // chain, this transformation is more complex.  Note that multiple things
15285       // could use the value result, which we should ignore.
15286       SDNode *FlagUser = nullptr;
15287       for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
15288            FlagUser == nullptr; ++UI) {
15289         assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
15290         SDNode *User = *UI;
15291         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15292           if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
15293             FlagUser = User;
15294             break;
15295           }
15296         }
15297       }
15298 
15299       // If the user is a MFOCRF instruction, we know this is safe.
15300       // Otherwise we give up for right now.
15301       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15302         return SDValue(VCMPrecNode, 0);
15303     }
15304     break;
15305   case ISD::BRCOND: {
15306     SDValue Cond = N->getOperand(1);
15307     SDValue Target = N->getOperand(2);
15308 
15309     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15310         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15311           Intrinsic::loop_decrement) {
15312 
15313       // We now need to make the intrinsic dead (it cannot be instruction
15314       // selected).
15315       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15316       assert(Cond.getNode()->hasOneUse() &&
15317              "Counter decrement has more than one use");
15318 
15319       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15320                          N->getOperand(0), Target);
15321     }
15322   }
15323   break;
15324   case ISD::BR_CC: {
15325     // If this is a branch on an altivec predicate comparison, lower this so
15326     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
15327     // lowering is done pre-legalize, because the legalizer lowers the predicate
15328     // compare down to code that is difficult to reassemble.
15329     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15330     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15331 
15332     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15333     // value. If so, pass-through the AND to get to the intrinsic.
15334     if (LHS.getOpcode() == ISD::AND &&
15335         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15336         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15337           Intrinsic::loop_decrement &&
15338         isa<ConstantSDNode>(LHS.getOperand(1)) &&
15339         !isNullConstant(LHS.getOperand(1)))
15340       LHS = LHS.getOperand(0);
15341 
15342     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15343         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15344           Intrinsic::loop_decrement &&
15345         isa<ConstantSDNode>(RHS)) {
15346       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
15347              "Counter decrement comparison is not EQ or NE");
15348 
15349       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15350       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15351                     (CC == ISD::SETNE && !Val);
15352 
15353       // We now need to make the intrinsic dead (it cannot be instruction
15354       // selected).
15355       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15356       assert(LHS.getNode()->hasOneUse() &&
15357              "Counter decrement has more than one use");
15358 
15359       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15360                          N->getOperand(0), N->getOperand(4));
15361     }
15362 
15363     int CompareOpc;
15364     bool isDot;
15365 
15366     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15367         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15368         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15369       assert(isDot && "Can't compare against a vector result!");
15370 
15371       // If this is a comparison against something other than 0/1, then we know
15372       // that the condition is never/always true.
15373       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15374       if (Val != 0 && Val != 1) {
15375         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
15376           return N->getOperand(0);
15377         // Always !=, turn it into an unconditional branch.
15378         return DAG.getNode(ISD::BR, dl, MVT::Other,
15379                            N->getOperand(0), N->getOperand(4));
15380       }
15381 
15382       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15383 
15384       // Create the PPCISD altivec 'dot' comparison node.
15385       SDValue Ops[] = {
15386         LHS.getOperand(2),  // LHS of compare
15387         LHS.getOperand(3),  // RHS of compare
15388         DAG.getConstant(CompareOpc, dl, MVT::i32)
15389       };
15390       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15391       SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
15392 
15393       // Unpack the result based on how the target uses it.
15394       PPC::Predicate CompOpc;
15395       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15396       default:  // Can't happen, don't crash on invalid number though.
15397       case 0:   // Branch on the value of the EQ bit of CR6.
15398         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15399         break;
15400       case 1:   // Branch on the inverted value of the EQ bit of CR6.
15401         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15402         break;
15403       case 2:   // Branch on the value of the LT bit of CR6.
15404         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15405         break;
15406       case 3:   // Branch on the inverted value of the LT bit of CR6.
15407         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15408         break;
15409       }
15410 
15411       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15412                          DAG.getConstant(CompOpc, dl, MVT::i32),
15413                          DAG.getRegister(PPC::CR6, MVT::i32),
15414                          N->getOperand(4), CompNode.getValue(1));
15415     }
15416     break;
15417   }
15418   case ISD::BUILD_VECTOR:
15419     return DAGCombineBuildVector(N, DCI);
15420   case ISD::ABS:
15421     return combineABS(N, DCI);
15422   case ISD::VSELECT:
15423     return combineVSelect(N, DCI);
15424   }
15425 
15426   return SDValue();
15427 }
15428 
15429 SDValue
15430 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
15431                                  SelectionDAG &DAG,
15432                                  SmallVectorImpl<SDNode *> &Created) const {
15433   // fold (sdiv X, pow2)
15434   EVT VT = N->getValueType(0);
15435   if (VT == MVT::i64 && !Subtarget.isPPC64())
15436     return SDValue();
15437   if ((VT != MVT::i32 && VT != MVT::i64) ||
15438       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
15439     return SDValue();
15440 
15441   SDLoc DL(N);
15442   SDValue N0 = N->getOperand(0);
15443 
15444   bool IsNegPow2 = (-Divisor).isPowerOf2();
15445   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15446   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15447 
15448   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15449   Created.push_back(Op.getNode());
15450 
15451   if (IsNegPow2) {
15452     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15453     Created.push_back(Op.getNode());
15454   }
15455 
15456   return Op;
15457 }
15458 
15459 //===----------------------------------------------------------------------===//
15460 // Inline Assembly Support
15461 //===----------------------------------------------------------------------===//
15462 
15463 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15464                                                       KnownBits &Known,
15465                                                       const APInt &DemandedElts,
15466                                                       const SelectionDAG &DAG,
15467                                                       unsigned Depth) const {
15468   Known.resetAll();
15469   switch (Op.getOpcode()) {
15470   default: break;
15471   case PPCISD::LBRX: {
15472     // lhbrx is known to have the top bits cleared out.
15473     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15474       Known.Zero = 0xFFFF0000;
15475     break;
15476   }
15477   case ISD::INTRINSIC_WO_CHAIN: {
15478     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15479     default: break;
15480     case Intrinsic::ppc_altivec_vcmpbfp_p:
15481     case Intrinsic::ppc_altivec_vcmpeqfp_p:
15482     case Intrinsic::ppc_altivec_vcmpequb_p:
15483     case Intrinsic::ppc_altivec_vcmpequh_p:
15484     case Intrinsic::ppc_altivec_vcmpequw_p:
15485     case Intrinsic::ppc_altivec_vcmpequd_p:
15486     case Intrinsic::ppc_altivec_vcmpequq_p:
15487     case Intrinsic::ppc_altivec_vcmpgefp_p:
15488     case Intrinsic::ppc_altivec_vcmpgtfp_p:
15489     case Intrinsic::ppc_altivec_vcmpgtsb_p:
15490     case Intrinsic::ppc_altivec_vcmpgtsh_p:
15491     case Intrinsic::ppc_altivec_vcmpgtsw_p:
15492     case Intrinsic::ppc_altivec_vcmpgtsd_p:
15493     case Intrinsic::ppc_altivec_vcmpgtsq_p:
15494     case Intrinsic::ppc_altivec_vcmpgtub_p:
15495     case Intrinsic::ppc_altivec_vcmpgtuh_p:
15496     case Intrinsic::ppc_altivec_vcmpgtuw_p:
15497     case Intrinsic::ppc_altivec_vcmpgtud_p:
15498     case Intrinsic::ppc_altivec_vcmpgtuq_p:
15499       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
15500       break;
15501     }
15502   }
15503   }
15504 }
15505 
15506 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
15507   switch (Subtarget.getCPUDirective()) {
15508   default: break;
15509   case PPC::DIR_970:
15510   case PPC::DIR_PWR4:
15511   case PPC::DIR_PWR5:
15512   case PPC::DIR_PWR5X:
15513   case PPC::DIR_PWR6:
15514   case PPC::DIR_PWR6X:
15515   case PPC::DIR_PWR7:
15516   case PPC::DIR_PWR8:
15517   case PPC::DIR_PWR9:
15518   case PPC::DIR_PWR10:
15519   case PPC::DIR_PWR_FUTURE: {
15520     if (!ML)
15521       break;
15522 
15523     if (!DisableInnermostLoopAlign32) {
15524       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15525       // so that we can decrease cache misses and branch-prediction misses.
15526       // Actual alignment of the loop will depend on the hotness check and other
15527       // logic in alignBlocks.
15528       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15529         return Align(32);
15530     }
15531 
15532     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15533 
15534     // For small loops (between 5 and 8 instructions), align to a 32-byte
15535     // boundary so that the entire loop fits in one instruction-cache line.
15536     uint64_t LoopSize = 0;
15537     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15538       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15539         LoopSize += TII->getInstSizeInBytes(*J);
15540         if (LoopSize > 32)
15541           break;
15542       }
15543 
15544     if (LoopSize > 16 && LoopSize <= 32)
15545       return Align(32);
15546 
15547     break;
15548   }
15549   }
15550 
15551   return TargetLowering::getPrefLoopAlignment(ML);
15552 }
15553 
15554 /// getConstraintType - Given a constraint, return the type of
15555 /// constraint it is for this target.
15556 PPCTargetLowering::ConstraintType
15557 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
15558   if (Constraint.size() == 1) {
15559     switch (Constraint[0]) {
15560     default: break;
15561     case 'b':
15562     case 'r':
15563     case 'f':
15564     case 'd':
15565     case 'v':
15566     case 'y':
15567       return C_RegisterClass;
15568     case 'Z':
15569       // FIXME: While Z does indicate a memory constraint, it specifically
15570       // indicates an r+r address (used in conjunction with the 'y' modifier
15571       // in the replacement string). Currently, we're forcing the base
15572       // register to be r0 in the asm printer (which is interpreted as zero)
15573       // and forming the complete address in the second register. This is
15574       // suboptimal.
15575       return C_Memory;
15576     }
15577   } else if (Constraint == "wc") { // individual CR bits.
15578     return C_RegisterClass;
15579   } else if (Constraint == "wa" || Constraint == "wd" ||
15580              Constraint == "wf" || Constraint == "ws" ||
15581              Constraint == "wi" || Constraint == "ww") {
15582     return C_RegisterClass; // VSX registers.
15583   }
15584   return TargetLowering::getConstraintType(Constraint);
15585 }
15586 
15587 /// Examine constraint type and operand type and determine a weight value.
15588 /// This object must already have been set up with the operand type
15589 /// and the current alternative constraint selected.
15590 TargetLowering::ConstraintWeight
15591 PPCTargetLowering::getSingleConstraintMatchWeight(
15592     AsmOperandInfo &info, const char *constraint) const {
15593   ConstraintWeight weight = CW_Invalid;
15594   Value *CallOperandVal = info.CallOperandVal;
15595     // If we don't have a value, we can't do a match,
15596     // but allow it at the lowest weight.
15597   if (!CallOperandVal)
15598     return CW_Default;
15599   Type *type = CallOperandVal->getType();
15600 
15601   // Look at the constraint type.
15602   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15603     return CW_Register; // an individual CR bit.
15604   else if ((StringRef(constraint) == "wa" ||
15605             StringRef(constraint) == "wd" ||
15606             StringRef(constraint) == "wf") &&
15607            type->isVectorTy())
15608     return CW_Register;
15609   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15610     return CW_Register; // just hold 64-bit integers data.
15611   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15612     return CW_Register;
15613   else if (StringRef(constraint) == "ww" && type->isFloatTy())
15614     return CW_Register;
15615 
15616   switch (*constraint) {
15617   default:
15618     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15619     break;
15620   case 'b':
15621     if (type->isIntegerTy())
15622       weight = CW_Register;
15623     break;
15624   case 'f':
15625     if (type->isFloatTy())
15626       weight = CW_Register;
15627     break;
15628   case 'd':
15629     if (type->isDoubleTy())
15630       weight = CW_Register;
15631     break;
15632   case 'v':
15633     if (type->isVectorTy())
15634       weight = CW_Register;
15635     break;
15636   case 'y':
15637     weight = CW_Register;
15638     break;
15639   case 'Z':
15640     weight = CW_Memory;
15641     break;
15642   }
15643   return weight;
15644 }
15645 
15646 std::pair<unsigned, const TargetRegisterClass *>
15647 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
15648                                                 StringRef Constraint,
15649                                                 MVT VT) const {
15650   if (Constraint.size() == 1) {
15651     // GCC RS6000 Constraint Letters
15652     switch (Constraint[0]) {
15653     case 'b':   // R1-R31
15654       if (VT == MVT::i64 && Subtarget.isPPC64())
15655         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15656       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15657     case 'r':   // R0-R31
15658       if (VT == MVT::i64 && Subtarget.isPPC64())
15659         return std::make_pair(0U, &PPC::G8RCRegClass);
15660       return std::make_pair(0U, &PPC::GPRCRegClass);
15661     // 'd' and 'f' constraints are both defined to be "the floating point
15662     // registers", where one is for 32-bit and the other for 64-bit. We don't
15663     // really care overly much here so just give them all the same reg classes.
15664     case 'd':
15665     case 'f':
15666       if (Subtarget.hasSPE()) {
15667         if (VT == MVT::f32 || VT == MVT::i32)
15668           return std::make_pair(0U, &PPC::GPRCRegClass);
15669         if (VT == MVT::f64 || VT == MVT::i64)
15670           return std::make_pair(0U, &PPC::SPERCRegClass);
15671       } else {
15672         if (VT == MVT::f32 || VT == MVT::i32)
15673           return std::make_pair(0U, &PPC::F4RCRegClass);
15674         if (VT == MVT::f64 || VT == MVT::i64)
15675           return std::make_pair(0U, &PPC::F8RCRegClass);
15676       }
15677       break;
15678     case 'v':
15679       if (Subtarget.hasAltivec())
15680         return std::make_pair(0U, &PPC::VRRCRegClass);
15681       break;
15682     case 'y':   // crrc
15683       return std::make_pair(0U, &PPC::CRRCRegClass);
15684     }
15685   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15686     // An individual CR bit.
15687     return std::make_pair(0U, &PPC::CRBITRCRegClass);
15688   } else if ((Constraint == "wa" || Constraint == "wd" ||
15689              Constraint == "wf" || Constraint == "wi") &&
15690              Subtarget.hasVSX()) {
15691     return std::make_pair(0U, &PPC::VSRCRegClass);
15692   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15693     if (VT == MVT::f32 && Subtarget.hasP8Vector())
15694       return std::make_pair(0U, &PPC::VSSRCRegClass);
15695     else
15696       return std::make_pair(0U, &PPC::VSFRCRegClass);
15697   }
15698 
15699   // If we name a VSX register, we can't defer to the base class because it
15700   // will not recognize the correct register (their names will be VSL{0-31}
15701   // and V{0-31} so they won't match). So we match them here.
15702   if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15703     int VSNum = atoi(Constraint.data() + 3);
15704     assert(VSNum >= 0 && VSNum <= 63 &&
15705            "Attempted to access a vsr out of range");
15706     if (VSNum < 32)
15707       return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15708     return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15709   }
15710   std::pair<unsigned, const TargetRegisterClass *> R =
15711       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
15712 
15713   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15714   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15715   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15716   // register.
15717   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15718   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15719   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15720       PPC::GPRCRegClass.contains(R.first))
15721     return std::make_pair(TRI->getMatchingSuperReg(R.first,
15722                             PPC::sub_32, &PPC::G8RCRegClass),
15723                           &PPC::G8RCRegClass);
15724 
15725   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15726   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15727     R.first = PPC::CR0;
15728     R.second = &PPC::CRRCRegClass;
15729   }
15730 
15731   return R;
15732 }
15733 
15734 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15735 /// vector.  If it is invalid, don't add anything to Ops.
15736 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15737                                                      std::string &Constraint,
15738                                                      std::vector<SDValue>&Ops,
15739                                                      SelectionDAG &DAG) const {
15740   SDValue Result;
15741 
15742   // Only support length 1 constraints.
15743   if (Constraint.length() > 1) return;
15744 
15745   char Letter = Constraint[0];
15746   switch (Letter) {
15747   default: break;
15748   case 'I':
15749   case 'J':
15750   case 'K':
15751   case 'L':
15752   case 'M':
15753   case 'N':
15754   case 'O':
15755   case 'P': {
15756     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15757     if (!CST) return; // Must be an immediate to match.
15758     SDLoc dl(Op);
15759     int64_t Value = CST->getSExtValue();
15760     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15761                          // numbers are printed as such.
15762     switch (Letter) {
15763     default: llvm_unreachable("Unknown constraint letter!");
15764     case 'I':  // "I" is a signed 16-bit constant.
15765       if (isInt<16>(Value))
15766         Result = DAG.getTargetConstant(Value, dl, TCVT);
15767       break;
15768     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
15769       if (isShiftedUInt<16, 16>(Value))
15770         Result = DAG.getTargetConstant(Value, dl, TCVT);
15771       break;
15772     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
15773       if (isShiftedInt<16, 16>(Value))
15774         Result = DAG.getTargetConstant(Value, dl, TCVT);
15775       break;
15776     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
15777       if (isUInt<16>(Value))
15778         Result = DAG.getTargetConstant(Value, dl, TCVT);
15779       break;
15780     case 'M':  // "M" is a constant that is greater than 31.
15781       if (Value > 31)
15782         Result = DAG.getTargetConstant(Value, dl, TCVT);
15783       break;
15784     case 'N':  // "N" is a positive constant that is an exact power of two.
15785       if (Value > 0 && isPowerOf2_64(Value))
15786         Result = DAG.getTargetConstant(Value, dl, TCVT);
15787       break;
15788     case 'O':  // "O" is the constant zero.
15789       if (Value == 0)
15790         Result = DAG.getTargetConstant(Value, dl, TCVT);
15791       break;
15792     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
15793       if (isInt<16>(-Value))
15794         Result = DAG.getTargetConstant(Value, dl, TCVT);
15795       break;
15796     }
15797     break;
15798   }
15799   }
15800 
15801   if (Result.getNode()) {
15802     Ops.push_back(Result);
15803     return;
15804   }
15805 
15806   // Handle standard constraint letters.
15807   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15808 }
15809 
15810 // isLegalAddressingMode - Return true if the addressing mode represented
15811 // by AM is legal for this target, for a load/store of the specified type.
15812 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
15813                                               const AddrMode &AM, Type *Ty,
15814                                               unsigned AS,
15815                                               Instruction *I) const {
15816   // Vector type r+i form is supported since power9 as DQ form. We don't check
15817   // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15818   // imm form is preferred and the offset can be adjusted to use imm form later
15819   // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15820   // max offset to check legal addressing mode, we should be a little aggressive
15821   // to contain other offsets for that LSRUse.
15822   if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15823     return false;
15824 
15825   // PPC allows a sign-extended 16-bit immediate field.
15826   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15827     return false;
15828 
15829   // No global is ever allowed as a base.
15830   if (AM.BaseGV)
15831     return false;
15832 
15833   // PPC only support r+r,
15834   switch (AM.Scale) {
15835   case 0:  // "r+i" or just "i", depending on HasBaseReg.
15836     break;
15837   case 1:
15838     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
15839       return false;
15840     // Otherwise we have r+r or r+i.
15841     break;
15842   case 2:
15843     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
15844       return false;
15845     // Allow 2*r as r+r.
15846     break;
15847   default:
15848     // No other scales are supported.
15849     return false;
15850   }
15851 
15852   return true;
15853 }
15854 
15855 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15856                                            SelectionDAG &DAG) const {
15857   MachineFunction &MF = DAG.getMachineFunction();
15858   MachineFrameInfo &MFI = MF.getFrameInfo();
15859   MFI.setReturnAddressIsTaken(true);
15860 
15861   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
15862     return SDValue();
15863 
15864   SDLoc dl(Op);
15865   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15866 
15867   // Make sure the function does not optimize away the store of the RA to
15868   // the stack.
15869   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15870   FuncInfo->setLRStoreRequired();
15871   bool isPPC64 = Subtarget.isPPC64();
15872   auto PtrVT = getPointerTy(MF.getDataLayout());
15873 
15874   if (Depth > 0) {
15875     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15876     SDValue Offset =
15877         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15878                         isPPC64 ? MVT::i64 : MVT::i32);
15879     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15880                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15881                        MachinePointerInfo());
15882   }
15883 
15884   // Just load the return address off the stack.
15885   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15886   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15887                      MachinePointerInfo());
15888 }
15889 
15890 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15891                                           SelectionDAG &DAG) const {
15892   SDLoc dl(Op);
15893   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15894 
15895   MachineFunction &MF = DAG.getMachineFunction();
15896   MachineFrameInfo &MFI = MF.getFrameInfo();
15897   MFI.setFrameAddressIsTaken(true);
15898 
15899   EVT PtrVT = getPointerTy(MF.getDataLayout());
15900   bool isPPC64 = PtrVT == MVT::i64;
15901 
15902   // Naked functions never have a frame pointer, and so we use r1. For all
15903   // other functions, this decision must be delayed until during PEI.
15904   unsigned FrameReg;
15905   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15906     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15907   else
15908     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15909 
15910   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15911                                          PtrVT);
15912   while (Depth--)
15913     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15914                             FrameAddr, MachinePointerInfo());
15915   return FrameAddr;
15916 }
15917 
15918 // FIXME? Maybe this could be a TableGen attribute on some registers and
15919 // this table could be generated automatically from RegInfo.
15920 Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
15921                                               const MachineFunction &MF) const {
15922   bool isPPC64 = Subtarget.isPPC64();
15923 
15924   bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15925   if (!is64Bit && VT != LLT::scalar(32))
15926     report_fatal_error("Invalid register global variable type");
15927 
15928   Register Reg = StringSwitch<Register>(RegName)
15929                      .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15930                      .Case("r2", isPPC64 ? Register() : PPC::R2)
15931                      .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15932                      .Default(Register());
15933 
15934   if (Reg)
15935     return Reg;
15936   report_fatal_error("Invalid register name global variable");
15937 }
15938 
15939 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
15940   // 32-bit SVR4 ABI access everything as got-indirect.
15941   if (Subtarget.is32BitELFABI())
15942     return true;
15943 
15944   // AIX accesses everything indirectly through the TOC, which is similar to
15945   // the GOT.
15946   if (Subtarget.isAIXABI())
15947     return true;
15948 
15949   CodeModel::Model CModel = getTargetMachine().getCodeModel();
15950   // If it is small or large code model, module locals are accessed
15951   // indirectly by loading their address from .toc/.got.
15952   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15953     return true;
15954 
15955   // JumpTable and BlockAddress are accessed as got-indirect.
15956   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15957     return true;
15958 
15959   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15960     return Subtarget.isGVIndirectSymbol(G->getGlobal());
15961 
15962   return false;
15963 }
15964 
15965 bool
15966 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
15967   // The PowerPC target isn't yet aware of offsets.
15968   return false;
15969 }
15970 
15971 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
15972                                            const CallInst &I,
15973                                            MachineFunction &MF,
15974                                            unsigned Intrinsic) const {
15975   switch (Intrinsic) {
15976   case Intrinsic::ppc_altivec_lvx:
15977   case Intrinsic::ppc_altivec_lvxl:
15978   case Intrinsic::ppc_altivec_lvebx:
15979   case Intrinsic::ppc_altivec_lvehx:
15980   case Intrinsic::ppc_altivec_lvewx:
15981   case Intrinsic::ppc_vsx_lxvd2x:
15982   case Intrinsic::ppc_vsx_lxvw4x:
15983   case Intrinsic::ppc_vsx_lxvd2x_be:
15984   case Intrinsic::ppc_vsx_lxvw4x_be:
15985   case Intrinsic::ppc_vsx_lxvl:
15986   case Intrinsic::ppc_vsx_lxvll: {
15987     EVT VT;
15988     switch (Intrinsic) {
15989     case Intrinsic::ppc_altivec_lvebx:
15990       VT = MVT::i8;
15991       break;
15992     case Intrinsic::ppc_altivec_lvehx:
15993       VT = MVT::i16;
15994       break;
15995     case Intrinsic::ppc_altivec_lvewx:
15996       VT = MVT::i32;
15997       break;
15998     case Intrinsic::ppc_vsx_lxvd2x:
15999     case Intrinsic::ppc_vsx_lxvd2x_be:
16000       VT = MVT::v2f64;
16001       break;
16002     default:
16003       VT = MVT::v4i32;
16004       break;
16005     }
16006 
16007     Info.opc = ISD::INTRINSIC_W_CHAIN;
16008     Info.memVT = VT;
16009     Info.ptrVal = I.getArgOperand(0);
16010     Info.offset = -VT.getStoreSize()+1;
16011     Info.size = 2*VT.getStoreSize()-1;
16012     Info.align = Align(1);
16013     Info.flags = MachineMemOperand::MOLoad;
16014     return true;
16015   }
16016   case Intrinsic::ppc_altivec_stvx:
16017   case Intrinsic::ppc_altivec_stvxl:
16018   case Intrinsic::ppc_altivec_stvebx:
16019   case Intrinsic::ppc_altivec_stvehx:
16020   case Intrinsic::ppc_altivec_stvewx:
16021   case Intrinsic::ppc_vsx_stxvd2x:
16022   case Intrinsic::ppc_vsx_stxvw4x:
16023   case Intrinsic::ppc_vsx_stxvd2x_be:
16024   case Intrinsic::ppc_vsx_stxvw4x_be:
16025   case Intrinsic::ppc_vsx_stxvl:
16026   case Intrinsic::ppc_vsx_stxvll: {
16027     EVT VT;
16028     switch (Intrinsic) {
16029     case Intrinsic::ppc_altivec_stvebx:
16030       VT = MVT::i8;
16031       break;
16032     case Intrinsic::ppc_altivec_stvehx:
16033       VT = MVT::i16;
16034       break;
16035     case Intrinsic::ppc_altivec_stvewx:
16036       VT = MVT::i32;
16037       break;
16038     case Intrinsic::ppc_vsx_stxvd2x:
16039     case Intrinsic::ppc_vsx_stxvd2x_be:
16040       VT = MVT::v2f64;
16041       break;
16042     default:
16043       VT = MVT::v4i32;
16044       break;
16045     }
16046 
16047     Info.opc = ISD::INTRINSIC_VOID;
16048     Info.memVT = VT;
16049     Info.ptrVal = I.getArgOperand(1);
16050     Info.offset = -VT.getStoreSize()+1;
16051     Info.size = 2*VT.getStoreSize()-1;
16052     Info.align = Align(1);
16053     Info.flags = MachineMemOperand::MOStore;
16054     return true;
16055   }
16056   default:
16057     break;
16058   }
16059 
16060   return false;
16061 }
16062 
16063 /// It returns EVT::Other if the type should be determined using generic
16064 /// target-independent logic.
16065 EVT PPCTargetLowering::getOptimalMemOpType(
16066     const MemOp &Op, const AttributeList &FuncAttributes) const {
16067   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
16068     // We should use Altivec/VSX loads and stores when available. For unaligned
16069     // addresses, unaligned VSX loads are only fast starting with the P8.
16070     if (Subtarget.hasAltivec() && Op.size() >= 16 &&
16071         (Op.isAligned(Align(16)) ||
16072          ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
16073       return MVT::v4i32;
16074   }
16075 
16076   if (Subtarget.isPPC64()) {
16077     return MVT::i64;
16078   }
16079 
16080   return MVT::i32;
16081 }
16082 
16083 /// Returns true if it is beneficial to convert a load of a constant
16084 /// to just the constant itself.
16085 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
16086                                                           Type *Ty) const {
16087   assert(Ty->isIntegerTy());
16088 
16089   unsigned BitSize = Ty->getPrimitiveSizeInBits();
16090   return !(BitSize == 0 || BitSize > 64);
16091 }
16092 
16093 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
16094   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16095     return false;
16096   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16097   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16098   return NumBits1 == 64 && NumBits2 == 32;
16099 }
16100 
16101 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
16102   if (!VT1.isInteger() || !VT2.isInteger())
16103     return false;
16104   unsigned NumBits1 = VT1.getSizeInBits();
16105   unsigned NumBits2 = VT2.getSizeInBits();
16106   return NumBits1 == 64 && NumBits2 == 32;
16107 }
16108 
16109 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
16110   // Generally speaking, zexts are not free, but they are free when they can be
16111   // folded with other operations.
16112   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16113     EVT MemVT = LD->getMemoryVT();
16114     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16115          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16116         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16117          LD->getExtensionType() == ISD::ZEXTLOAD))
16118       return true;
16119   }
16120 
16121   // FIXME: Add other cases...
16122   //  - 32-bit shifts with a zext to i64
16123   //  - zext after ctlz, bswap, etc.
16124   //  - zext after and by a constant mask
16125 
16126   return TargetLowering::isZExtFree(Val, VT2);
16127 }
16128 
16129 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16130   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
16131          "invalid fpext types");
16132   // Extending to float128 is not free.
16133   if (DestVT == MVT::f128)
16134     return false;
16135   return true;
16136 }
16137 
16138 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
16139   return isInt<16>(Imm) || isUInt<16>(Imm);
16140 }
16141 
16142 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
16143   return isInt<16>(Imm) || isUInt<16>(Imm);
16144 }
16145 
16146 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
16147                                                        unsigned,
16148                                                        unsigned,
16149                                                        MachineMemOperand::Flags,
16150                                                        bool *Fast) const {
16151   if (DisablePPCUnaligned)
16152     return false;
16153 
16154   // PowerPC supports unaligned memory access for simple non-vector types.
16155   // Although accessing unaligned addresses is not as efficient as accessing
16156   // aligned addresses, it is generally more efficient than manual expansion,
16157   // and generally only traps for software emulation when crossing page
16158   // boundaries.
16159 
16160   if (!VT.isSimple())
16161     return false;
16162 
16163   if (VT.isFloatingPoint() && !VT.isVector() &&
16164       !Subtarget.allowsUnalignedFPAccess())
16165     return false;
16166 
16167   if (VT.getSimpleVT().isVector()) {
16168     if (Subtarget.hasVSX()) {
16169       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16170           VT != MVT::v4f32 && VT != MVT::v4i32)
16171         return false;
16172     } else {
16173       return false;
16174     }
16175   }
16176 
16177   if (VT == MVT::ppcf128)
16178     return false;
16179 
16180   if (Fast)
16181     *Fast = true;
16182 
16183   return true;
16184 }
16185 
16186 bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
16187                                                SDValue C) const {
16188   // Check integral scalar types.
16189   if (!VT.isScalarInteger())
16190     return false;
16191   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16192     if (!ConstNode->getAPIntValue().isSignedIntN(64))
16193       return false;
16194     // This transformation will generate >= 2 operations. But the following
16195     // cases will generate <= 2 instructions during ISEL. So exclude them.
16196     // 1. If the constant multiplier fits 16 bits, it can be handled by one
16197     // HW instruction, ie. MULLI
16198     // 2. If the multiplier after shifted fits 16 bits, an extra shift
16199     // instruction is needed than case 1, ie. MULLI and RLDICR
16200     int64_t Imm = ConstNode->getSExtValue();
16201     unsigned Shift = countTrailingZeros<uint64_t>(Imm);
16202     Imm >>= Shift;
16203     if (isInt<16>(Imm))
16204       return false;
16205     uint64_t UImm = static_cast<uint64_t>(Imm);
16206     if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16207         isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16208       return true;
16209   }
16210   return false;
16211 }
16212 
16213 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
16214                                                    EVT VT) const {
16215   return isFMAFasterThanFMulAndFAdd(
16216       MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
16217 }
16218 
16219 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
16220                                                    Type *Ty) const {
16221   switch (Ty->getScalarType()->getTypeID()) {
16222   case Type::FloatTyID:
16223   case Type::DoubleTyID:
16224     return true;
16225   case Type::FP128TyID:
16226     return Subtarget.hasP9Vector();
16227   default:
16228     return false;
16229   }
16230 }
16231 
16232 // FIXME: add more patterns which are not profitable to hoist.
16233 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
16234   if (!I->hasOneUse())
16235     return true;
16236 
16237   Instruction *User = I->user_back();
16238   assert(User && "A single use instruction with no uses.");
16239 
16240   switch (I->getOpcode()) {
16241   case Instruction::FMul: {
16242     // Don't break FMA, PowerPC prefers FMA.
16243     if (User->getOpcode() != Instruction::FSub &&
16244         User->getOpcode() != Instruction::FAdd)
16245       return true;
16246 
16247     const TargetOptions &Options = getTargetMachine().Options;
16248     const Function *F = I->getFunction();
16249     const DataLayout &DL = F->getParent()->getDataLayout();
16250     Type *Ty = User->getOperand(0)->getType();
16251 
16252     return !(
16253         isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16254         isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
16255         (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16256   }
16257   case Instruction::Load: {
16258     // Don't break "store (load float*)" pattern, this pattern will be combined
16259     // to "store (load int32)" in later InstCombine pass. See function
16260     // combineLoadToOperationType. On PowerPC, loading a float point takes more
16261     // cycles than loading a 32 bit integer.
16262     LoadInst *LI = cast<LoadInst>(I);
16263     // For the loads that combineLoadToOperationType does nothing, like
16264     // ordered load, it should be profitable to hoist them.
16265     // For swifterror load, it can only be used for pointer to pointer type, so
16266     // later type check should get rid of this case.
16267     if (!LI->isUnordered())
16268       return true;
16269 
16270     if (User->getOpcode() != Instruction::Store)
16271       return true;
16272 
16273     if (I->getType()->getTypeID() != Type::FloatTyID)
16274       return true;
16275 
16276     return false;
16277   }
16278   default:
16279     return true;
16280   }
16281   return true;
16282 }
16283 
16284 const MCPhysReg *
16285 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
16286   // LR is a callee-save register, but we must treat it as clobbered by any call
16287   // site. Hence we include LR in the scratch registers, which are in turn added
16288   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16289   // to CTR, which is used by any indirect call.
16290   static const MCPhysReg ScratchRegs[] = {
16291     PPC::X12, PPC::LR8, PPC::CTR8, 0
16292   };
16293 
16294   return ScratchRegs;
16295 }
16296 
16297 Register PPCTargetLowering::getExceptionPointerRegister(
16298     const Constant *PersonalityFn) const {
16299   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16300 }
16301 
16302 Register PPCTargetLowering::getExceptionSelectorRegister(
16303     const Constant *PersonalityFn) const {
16304   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16305 }
16306 
16307 bool
16308 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
16309                      EVT VT , unsigned DefinedValues) const {
16310   if (VT == MVT::v2i64)
16311     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16312 
16313   if (Subtarget.hasVSX())
16314     return true;
16315 
16316   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
16317 }
16318 
16319 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
16320   if (DisableILPPref || Subtarget.enableMachineScheduler())
16321     return TargetLowering::getSchedulingPreference(N);
16322 
16323   return Sched::ILP;
16324 }
16325 
16326 // Create a fast isel object.
16327 FastISel *
16328 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
16329                                   const TargetLibraryInfo *LibInfo) const {
16330   return PPC::createFastISel(FuncInfo, LibInfo);
16331 }
16332 
16333 // 'Inverted' means the FMA opcode after negating one multiplicand.
16334 // For example, (fma -a b c) = (fnmsub a b c)
16335 static unsigned invertFMAOpcode(unsigned Opc) {
16336   switch (Opc) {
16337   default:
16338     llvm_unreachable("Invalid FMA opcode for PowerPC!");
16339   case ISD::FMA:
16340     return PPCISD::FNMSUB;
16341   case PPCISD::FNMSUB:
16342     return ISD::FMA;
16343   }
16344 }
16345 
16346 SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
16347                                                 bool LegalOps, bool OptForSize,
16348                                                 NegatibleCost &Cost,
16349                                                 unsigned Depth) const {
16350   if (Depth > SelectionDAG::MaxRecursionDepth)
16351     return SDValue();
16352 
16353   unsigned Opc = Op.getOpcode();
16354   EVT VT = Op.getValueType();
16355   SDNodeFlags Flags = Op.getNode()->getFlags();
16356 
16357   switch (Opc) {
16358   case PPCISD::FNMSUB:
16359     if (!Op.hasOneUse() || !isTypeLegal(VT))
16360       break;
16361 
16362     const TargetOptions &Options = getTargetMachine().Options;
16363     SDValue N0 = Op.getOperand(0);
16364     SDValue N1 = Op.getOperand(1);
16365     SDValue N2 = Op.getOperand(2);
16366     SDLoc Loc(Op);
16367 
16368     NegatibleCost N2Cost = NegatibleCost::Expensive;
16369     SDValue NegN2 =
16370         getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16371 
16372     if (!NegN2)
16373       return SDValue();
16374 
16375     // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16376     // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16377     // These transformations may change sign of zeroes. For example,
16378     // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16379     if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16380       // Try and choose the cheaper one to negate.
16381       NegatibleCost N0Cost = NegatibleCost::Expensive;
16382       SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16383                                            N0Cost, Depth + 1);
16384 
16385       NegatibleCost N1Cost = NegatibleCost::Expensive;
16386       SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16387                                            N1Cost, Depth + 1);
16388 
16389       if (NegN0 && N0Cost <= N1Cost) {
16390         Cost = std::min(N0Cost, N2Cost);
16391         return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16392       } else if (NegN1) {
16393         Cost = std::min(N1Cost, N2Cost);
16394         return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16395       }
16396     }
16397 
16398     // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16399     if (isOperationLegal(ISD::FMA, VT)) {
16400       Cost = N2Cost;
16401       return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16402     }
16403 
16404     break;
16405   }
16406 
16407   return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16408                                               Cost, Depth);
16409 }
16410 
16411 // Override to enable LOAD_STACK_GUARD lowering on Linux.
16412 bool PPCTargetLowering::useLoadStackGuardNode() const {
16413   if (!Subtarget.isTargetLinux())
16414     return TargetLowering::useLoadStackGuardNode();
16415   return true;
16416 }
16417 
16418 // Override to disable global variable loading on Linux.
16419 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
16420   if (!Subtarget.isTargetLinux())
16421     return TargetLowering::insertSSPDeclarations(M);
16422 }
16423 
16424 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
16425                                      bool ForCodeSize) const {
16426   if (!VT.isSimple() || !Subtarget.hasVSX())
16427     return false;
16428 
16429   switch(VT.getSimpleVT().SimpleTy) {
16430   default:
16431     // For FP types that are currently not supported by PPC backend, return
16432     // false. Examples: f16, f80.
16433     return false;
16434   case MVT::f32:
16435   case MVT::f64:
16436     if (Subtarget.hasPrefixInstrs()) {
16437       // With prefixed instructions, we can materialize anything that can be
16438       // represented with a 32-bit immediate, not just positive zero.
16439       APFloat APFloatOfImm = Imm;
16440       return convertToNonDenormSingle(APFloatOfImm);
16441     }
16442     LLVM_FALLTHROUGH;
16443   case MVT::ppcf128:
16444     return Imm.isPosZero();
16445   }
16446 }
16447 
16448 // For vector shift operation op, fold
16449 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16450 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
16451                                   SelectionDAG &DAG) {
16452   SDValue N0 = N->getOperand(0);
16453   SDValue N1 = N->getOperand(1);
16454   EVT VT = N0.getValueType();
16455   unsigned OpSizeInBits = VT.getScalarSizeInBits();
16456   unsigned Opcode = N->getOpcode();
16457   unsigned TargetOpcode;
16458 
16459   switch (Opcode) {
16460   default:
16461     llvm_unreachable("Unexpected shift operation");
16462   case ISD::SHL:
16463     TargetOpcode = PPCISD::SHL;
16464     break;
16465   case ISD::SRL:
16466     TargetOpcode = PPCISD::SRL;
16467     break;
16468   case ISD::SRA:
16469     TargetOpcode = PPCISD::SRA;
16470     break;
16471   }
16472 
16473   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16474       N1->getOpcode() == ISD::AND)
16475     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
16476       if (Mask->getZExtValue() == OpSizeInBits - 1)
16477         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16478 
16479   return SDValue();
16480 }
16481 
16482 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
16483   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16484     return Value;
16485 
16486   SDValue N0 = N->getOperand(0);
16487   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16488   if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
16489       N0.getOpcode() != ISD::SIGN_EXTEND ||
16490       N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
16491       N->getValueType(0) != MVT::i64)
16492     return SDValue();
16493 
16494   // We can't save an operation here if the value is already extended, and
16495   // the existing shift is easier to combine.
16496   SDValue ExtsSrc = N0.getOperand(0);
16497   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
16498       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
16499     return SDValue();
16500 
16501   SDLoc DL(N0);
16502   SDValue ShiftBy = SDValue(CN1, 0);
16503   // We want the shift amount to be i32 on the extswli, but the shift could
16504   // have an i64.
16505   if (ShiftBy.getValueType() == MVT::i64)
16506     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
16507 
16508   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
16509                          ShiftBy);
16510 }
16511 
16512 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
16513   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16514     return Value;
16515 
16516   return SDValue();
16517 }
16518 
16519 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
16520   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16521     return Value;
16522 
16523   return SDValue();
16524 }
16525 
16526 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16527 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16528 // When C is zero, the equation (addi Z, -C) can be simplified to Z
16529 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16530 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
16531                                  const PPCSubtarget &Subtarget) {
16532   if (!Subtarget.isPPC64())
16533     return SDValue();
16534 
16535   SDValue LHS = N->getOperand(0);
16536   SDValue RHS = N->getOperand(1);
16537 
16538   auto isZextOfCompareWithConstant = [](SDValue Op) {
16539     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16540         Op.getValueType() != MVT::i64)
16541       return false;
16542 
16543     SDValue Cmp = Op.getOperand(0);
16544     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16545         Cmp.getOperand(0).getValueType() != MVT::i64)
16546       return false;
16547 
16548     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16549       int64_t NegConstant = 0 - Constant->getSExtValue();
16550       // Due to the limitations of the addi instruction,
16551       // -C is required to be [-32768, 32767].
16552       return isInt<16>(NegConstant);
16553     }
16554 
16555     return false;
16556   };
16557 
16558   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16559   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16560 
16561   // If there is a pattern, canonicalize a zext operand to the RHS.
16562   if (LHSHasPattern && !RHSHasPattern)
16563     std::swap(LHS, RHS);
16564   else if (!LHSHasPattern && !RHSHasPattern)
16565     return SDValue();
16566 
16567   SDLoc DL(N);
16568   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16569   SDValue Cmp = RHS.getOperand(0);
16570   SDValue Z = Cmp.getOperand(0);
16571   auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16572 
16573   assert(Constant && "Constant Should not be a null pointer.");
16574   int64_t NegConstant = 0 - Constant->getSExtValue();
16575 
16576   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16577   default: break;
16578   case ISD::SETNE: {
16579     //                                 when C == 0
16580     //                             --> addze X, (addic Z, -1).carry
16581     //                            /
16582     // add X, (zext(setne Z, C))--
16583     //                            \    when -32768 <= -C <= 32767 && C != 0
16584     //                             --> addze X, (addic (addi Z, -C), -1).carry
16585     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16586                               DAG.getConstant(NegConstant, DL, MVT::i64));
16587     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16588     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16589                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16590     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16591                        SDValue(Addc.getNode(), 1));
16592     }
16593   case ISD::SETEQ: {
16594     //                                 when C == 0
16595     //                             --> addze X, (subfic Z, 0).carry
16596     //                            /
16597     // add X, (zext(sete  Z, C))--
16598     //                            \    when -32768 <= -C <= 32767 && C != 0
16599     //                             --> addze X, (subfic (addi Z, -C), 0).carry
16600     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16601                               DAG.getConstant(NegConstant, DL, MVT::i64));
16602     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16603     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16604                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16605     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16606                        SDValue(Subc.getNode(), 1));
16607     }
16608   }
16609 
16610   return SDValue();
16611 }
16612 
16613 // Transform
16614 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16615 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16616 // In this case both C1 and C2 must be known constants.
16617 // C1+C2 must fit into a 34 bit signed integer.
16618 static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
16619                                           const PPCSubtarget &Subtarget) {
16620   if (!Subtarget.isUsingPCRelativeCalls())
16621     return SDValue();
16622 
16623   // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16624   // If we find that node try to cast the Global Address and the Constant.
16625   SDValue LHS = N->getOperand(0);
16626   SDValue RHS = N->getOperand(1);
16627 
16628   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16629     std::swap(LHS, RHS);
16630 
16631   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16632     return SDValue();
16633 
16634   // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16635   GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16636   ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16637 
16638   // Check that both casts succeeded.
16639   if (!GSDN || !ConstNode)
16640     return SDValue();
16641 
16642   int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16643   SDLoc DL(GSDN);
16644 
16645   // The signed int offset needs to fit in 34 bits.
16646   if (!isInt<34>(NewOffset))
16647     return SDValue();
16648 
16649   // The new global address is a copy of the old global address except
16650   // that it has the updated Offset.
16651   SDValue GA =
16652       DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16653                                  NewOffset, GSDN->getTargetFlags());
16654   SDValue MatPCRel =
16655       DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16656   return MatPCRel;
16657 }
16658 
16659 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16660   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16661     return Value;
16662 
16663   if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16664     return Value;
16665 
16666   return SDValue();
16667 }
16668 
16669 // Detect TRUNCATE operations on bitcasts of float128 values.
16670 // What we are looking for here is the situtation where we extract a subset
16671 // of bits from a 128 bit float.
16672 // This can be of two forms:
16673 // 1) BITCAST of f128 feeding TRUNCATE
16674 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16675 // The reason this is required is because we do not have a legal i128 type
16676 // and so we want to prevent having to store the f128 and then reload part
16677 // of it.
16678 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16679                                            DAGCombinerInfo &DCI) const {
16680   // If we are using CRBits then try that first.
16681   if (Subtarget.useCRBits()) {
16682     // Check if CRBits did anything and return that if it did.
16683     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16684       return CRTruncValue;
16685   }
16686 
16687   SDLoc dl(N);
16688   SDValue Op0 = N->getOperand(0);
16689 
16690   // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16691   if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16692     EVT VT = N->getValueType(0);
16693     if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16694       return SDValue();
16695     SDValue Sub = Op0.getOperand(0);
16696     if (Sub.getOpcode() == ISD::SUB) {
16697       SDValue SubOp0 = Sub.getOperand(0);
16698       SDValue SubOp1 = Sub.getOperand(1);
16699       if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16700           (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16701         return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16702                                SubOp1.getOperand(0),
16703                                DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16704       }
16705     }
16706   }
16707 
16708   // Looking for a truncate of i128 to i64.
16709   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16710     return SDValue();
16711 
16712   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16713 
16714   // SRL feeding TRUNCATE.
16715   if (Op0.getOpcode() == ISD::SRL) {
16716     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16717     // The right shift has to be by 64 bits.
16718     if (!ConstNode || ConstNode->getZExtValue() != 64)
16719       return SDValue();
16720 
16721     // Switch the element number to extract.
16722     EltToExtract = EltToExtract ? 0 : 1;
16723     // Update Op0 past the SRL.
16724     Op0 = Op0.getOperand(0);
16725   }
16726 
16727   // BITCAST feeding a TRUNCATE possibly via SRL.
16728   if (Op0.getOpcode() == ISD::BITCAST &&
16729       Op0.getValueType() == MVT::i128 &&
16730       Op0.getOperand(0).getValueType() == MVT::f128) {
16731     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16732     return DCI.DAG.getNode(
16733         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16734         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16735   }
16736   return SDValue();
16737 }
16738 
16739 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16740   SelectionDAG &DAG = DCI.DAG;
16741 
16742   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16743   if (!ConstOpOrElement)
16744     return SDValue();
16745 
16746   // An imul is usually smaller than the alternative sequence for legal type.
16747   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16748       isOperationLegal(ISD::MUL, N->getValueType(0)))
16749     return SDValue();
16750 
16751   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16752     switch (this->Subtarget.getCPUDirective()) {
16753     default:
16754       // TODO: enhance the condition for subtarget before pwr8
16755       return false;
16756     case PPC::DIR_PWR8:
16757       //  type        mul     add    shl
16758       // scalar        4       1      1
16759       // vector        7       2      2
16760       return true;
16761     case PPC::DIR_PWR9:
16762     case PPC::DIR_PWR10:
16763     case PPC::DIR_PWR_FUTURE:
16764       //  type        mul     add    shl
16765       // scalar        5       2      2
16766       // vector        7       2      2
16767 
16768       // The cycle RATIO of related operations are showed as a table above.
16769       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16770       // scalar and vector type. For 2 instrs patterns, add/sub + shl
16771       // are 4, it is always profitable; but for 3 instrs patterns
16772       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16773       // So we should only do it for vector type.
16774       return IsAddOne && IsNeg ? VT.isVector() : true;
16775     }
16776   };
16777 
16778   EVT VT = N->getValueType(0);
16779   SDLoc DL(N);
16780 
16781   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16782   bool IsNeg = MulAmt.isNegative();
16783   APInt MulAmtAbs = MulAmt.abs();
16784 
16785   if ((MulAmtAbs - 1).isPowerOf2()) {
16786     // (mul x, 2^N + 1) => (add (shl x, N), x)
16787     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16788 
16789     if (!IsProfitable(IsNeg, true, VT))
16790       return SDValue();
16791 
16792     SDValue Op0 = N->getOperand(0);
16793     SDValue Op1 =
16794         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16795                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16796     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16797 
16798     if (!IsNeg)
16799       return Res;
16800 
16801     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16802   } else if ((MulAmtAbs + 1).isPowerOf2()) {
16803     // (mul x, 2^N - 1) => (sub (shl x, N), x)
16804     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16805 
16806     if (!IsProfitable(IsNeg, false, VT))
16807       return SDValue();
16808 
16809     SDValue Op0 = N->getOperand(0);
16810     SDValue Op1 =
16811         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16812                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16813 
16814     if (!IsNeg)
16815       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16816     else
16817       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16818 
16819   } else {
16820     return SDValue();
16821   }
16822 }
16823 
16824 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16825 // in combiner since we need to check SD flags and other subtarget features.
16826 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16827                                           DAGCombinerInfo &DCI) const {
16828   SDValue N0 = N->getOperand(0);
16829   SDValue N1 = N->getOperand(1);
16830   SDValue N2 = N->getOperand(2);
16831   SDNodeFlags Flags = N->getFlags();
16832   EVT VT = N->getValueType(0);
16833   SelectionDAG &DAG = DCI.DAG;
16834   const TargetOptions &Options = getTargetMachine().Options;
16835   unsigned Opc = N->getOpcode();
16836   bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16837   bool LegalOps = !DCI.isBeforeLegalizeOps();
16838   SDLoc Loc(N);
16839 
16840   if (!isOperationLegal(ISD::FMA, VT))
16841     return SDValue();
16842 
16843   // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16844   // since (fnmsub a b c)=-0 while c-ab=+0.
16845   if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16846     return SDValue();
16847 
16848   // (fma (fneg a) b c) => (fnmsub a b c)
16849   // (fnmsub (fneg a) b c) => (fma a b c)
16850   if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16851     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16852 
16853   // (fma a (fneg b) c) => (fnmsub a b c)
16854   // (fnmsub a (fneg b) c) => (fma a b c)
16855   if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16856     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16857 
16858   return SDValue();
16859 }
16860 
16861 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16862   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16863   if (!Subtarget.is64BitELFABI())
16864     return false;
16865 
16866   // If not a tail call then no need to proceed.
16867   if (!CI->isTailCall())
16868     return false;
16869 
16870   // If sibling calls have been disabled and tail-calls aren't guaranteed
16871   // there is no reason to duplicate.
16872   auto &TM = getTargetMachine();
16873   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16874     return false;
16875 
16876   // Can't tail call a function called indirectly, or if it has variadic args.
16877   const Function *Callee = CI->getCalledFunction();
16878   if (!Callee || Callee->isVarArg())
16879     return false;
16880 
16881   // Make sure the callee and caller calling conventions are eligible for tco.
16882   const Function *Caller = CI->getParent()->getParent();
16883   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16884                                            CI->getCallingConv()))
16885       return false;
16886 
16887   // If the function is local then we have a good chance at tail-calling it
16888   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16889 }
16890 
16891 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16892   if (!Subtarget.hasVSX())
16893     return false;
16894   if (Subtarget.hasP9Vector() && VT == MVT::f128)
16895     return true;
16896   return VT == MVT::f32 || VT == MVT::f64 ||
16897     VT == MVT::v4f32 || VT == MVT::v2f64;
16898 }
16899 
16900 bool PPCTargetLowering::
16901 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16902   const Value *Mask = AndI.getOperand(1);
16903   // If the mask is suitable for andi. or andis. we should sink the and.
16904   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16905     // Can't handle constants wider than 64-bits.
16906     if (CI->getBitWidth() > 64)
16907       return false;
16908     int64_t ConstVal = CI->getZExtValue();
16909     return isUInt<16>(ConstVal) ||
16910       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16911   }
16912 
16913   // For non-constant masks, we can always use the record-form and.
16914   return true;
16915 }
16916 
16917 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16918 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16919 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16920 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16921 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16922 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16923   assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
16924   assert(Subtarget.hasP9Altivec() &&
16925          "Only combine this when P9 altivec supported!");
16926   EVT VT = N->getValueType(0);
16927   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16928     return SDValue();
16929 
16930   SelectionDAG &DAG = DCI.DAG;
16931   SDLoc dl(N);
16932   if (N->getOperand(0).getOpcode() == ISD::SUB) {
16933     // Even for signed integers, if it's known to be positive (as signed
16934     // integer) due to zero-extended inputs.
16935     unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16936     unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16937     if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16938          SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16939         (SubOpcd1 == ISD::ZERO_EXTEND ||
16940          SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16941       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16942                          N->getOperand(0)->getOperand(0),
16943                          N->getOperand(0)->getOperand(1),
16944                          DAG.getTargetConstant(0, dl, MVT::i32));
16945     }
16946 
16947     // For type v4i32, it can be optimized with xvnegsp + vabsduw
16948     if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16949         N->getOperand(0).hasOneUse()) {
16950       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16951                          N->getOperand(0)->getOperand(0),
16952                          N->getOperand(0)->getOperand(1),
16953                          DAG.getTargetConstant(1, dl, MVT::i32));
16954     }
16955   }
16956 
16957   return SDValue();
16958 }
16959 
16960 // For type v4i32/v8ii16/v16i8, transform
16961 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16962 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16963 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16964 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16965 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16966                                           DAGCombinerInfo &DCI) const {
16967   assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
16968   assert(Subtarget.hasP9Altivec() &&
16969          "Only combine this when P9 altivec supported!");
16970 
16971   SelectionDAG &DAG = DCI.DAG;
16972   SDLoc dl(N);
16973   SDValue Cond = N->getOperand(0);
16974   SDValue TrueOpnd = N->getOperand(1);
16975   SDValue FalseOpnd = N->getOperand(2);
16976   EVT VT = N->getOperand(1).getValueType();
16977 
16978   if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16979       FalseOpnd.getOpcode() != ISD::SUB)
16980     return SDValue();
16981 
16982   // ABSD only available for type v4i32/v8i16/v16i8
16983   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16984     return SDValue();
16985 
16986   // At least to save one more dependent computation
16987   if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16988     return SDValue();
16989 
16990   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16991 
16992   // Can only handle unsigned comparison here
16993   switch (CC) {
16994   default:
16995     return SDValue();
16996   case ISD::SETUGT:
16997   case ISD::SETUGE:
16998     break;
16999   case ISD::SETULT:
17000   case ISD::SETULE:
17001     std::swap(TrueOpnd, FalseOpnd);
17002     break;
17003   }
17004 
17005   SDValue CmpOpnd1 = Cond.getOperand(0);
17006   SDValue CmpOpnd2 = Cond.getOperand(1);
17007 
17008   // SETCC CmpOpnd1 CmpOpnd2 cond
17009   // TrueOpnd = CmpOpnd1 - CmpOpnd2
17010   // FalseOpnd = CmpOpnd2 - CmpOpnd1
17011   if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
17012       TrueOpnd.getOperand(1) == CmpOpnd2 &&
17013       FalseOpnd.getOperand(0) == CmpOpnd2 &&
17014       FalseOpnd.getOperand(1) == CmpOpnd1) {
17015     return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
17016                        CmpOpnd1, CmpOpnd2,
17017                        DAG.getTargetConstant(0, dl, MVT::i32));
17018   }
17019 
17020   return SDValue();
17021 }
17022