1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56 #include "llvm/CodeGen/TargetRegisterInfo.h"
57 #include "llvm/CodeGen/ValueTypes.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSectionXCOFF.h"
78 #include "llvm/MC/MCSymbolXCOFF.h"
79 #include "llvm/Support/AtomicOrdering.h"
80 #include "llvm/Support/BranchProbability.h"
81 #include "llvm/Support/Casting.h"
82 #include "llvm/Support/CodeGen.h"
83 #include "llvm/Support/CommandLine.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
86 #include "llvm/Support/ErrorHandling.h"
87 #include "llvm/Support/Format.h"
88 #include "llvm/Support/KnownBits.h"
89 #include "llvm/Support/MachineValueType.h"
90 #include "llvm/Support/MathExtras.h"
91 #include "llvm/Support/raw_ostream.h"
92 #include "llvm/Target/TargetMachine.h"
93 #include "llvm/Target/TargetOptions.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
124 static cl::opt<bool> EnablePPCPCRelTLS(
125     "enable-ppc-pcrel-tls",
126     cl::desc("enable the use of PC relative memops in TLS instructions on PPC"),
127     cl::Hidden);
128 
129 STATISTIC(NumTailCalls, "Number of tail calls");
130 STATISTIC(NumSiblingCalls, "Number of sibling calls");
131 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
132 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
133 
134 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135 
136 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137 
138 // FIXME: Remove this once the bug has been fixed!
139 extern cl::opt<bool> ANDIGlueBug;
140 
141 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
142                                      const PPCSubtarget &STI)
143     : TargetLowering(TM), Subtarget(STI) {
144   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145   // arguments are at least 4/8 bytes aligned.
146   bool isPPC64 = Subtarget.isPPC64();
147   setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148 
149   // Set up the register classes.
150   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151   if (!useSoftFloat()) {
152     if (hasSPE()) {
153       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154       addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
155     } else {
156       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
157       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
158     }
159   }
160 
161   // Match BITREVERSE to customized fast code sequence in the td file.
162   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
163   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
164 
165   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
166   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
167 
168   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
169   for (MVT VT : MVT::integer_valuetypes()) {
170     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
171     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
172   }
173 
174   if (Subtarget.isISA3_0()) {
175     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
176     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
177     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
178     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
179   } else {
180     // No extending loads from f16 or HW conversions back and forth.
181     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
182     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
183     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
184     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
185     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
186     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
187     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
188     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189   }
190 
191   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
192 
193   // PowerPC has pre-inc load and store's.
194   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
195   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
196   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
197   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
198   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
199   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
200   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
201   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
202   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
203   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
204   if (!Subtarget.hasSPE()) {
205     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
206     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
207     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
208     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
209   }
210 
211   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
212   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
213   for (MVT VT : ScalarIntVTs) {
214     setOperationAction(ISD::ADDC, VT, Legal);
215     setOperationAction(ISD::ADDE, VT, Legal);
216     setOperationAction(ISD::SUBC, VT, Legal);
217     setOperationAction(ISD::SUBE, VT, Legal);
218   }
219 
220   if (Subtarget.useCRBits()) {
221     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
222 
223     if (isPPC64 || Subtarget.hasFPCVT()) {
224       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
225       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
226                          isPPC64 ? MVT::i64 : MVT::i32);
227       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
228       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
229                         isPPC64 ? MVT::i64 : MVT::i32);
230     } else {
231       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
232       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
233     }
234 
235     // PowerPC does not support direct load/store of condition registers.
236     setOperationAction(ISD::LOAD, MVT::i1, Custom);
237     setOperationAction(ISD::STORE, MVT::i1, Custom);
238 
239     // FIXME: Remove this once the ANDI glue bug is fixed:
240     if (ANDIGlueBug)
241       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
242 
243     for (MVT VT : MVT::integer_valuetypes()) {
244       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
245       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
246       setTruncStoreAction(VT, MVT::i1, Expand);
247     }
248 
249     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
250   }
251 
252   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
253   // PPC (the libcall is not available).
254   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
255   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
256 
257   // We do not currently implement these libm ops for PowerPC.
258   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
259   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
260   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
261   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
262   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
263   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
264 
265   // PowerPC has no SREM/UREM instructions unless we are on P9
266   // On P9 we may use a hardware instruction to compute the remainder.
267   // When the result of both the remainder and the division is required it is
268   // more efficient to compute the remainder from the result of the division
269   // rather than use the remainder instruction. The instructions are legalized
270   // directly because the DivRemPairsPass performs the transformation at the IR
271   // level.
272   if (Subtarget.isISA3_0()) {
273     setOperationAction(ISD::SREM, MVT::i32, Legal);
274     setOperationAction(ISD::UREM, MVT::i32, Legal);
275     setOperationAction(ISD::SREM, MVT::i64, Legal);
276     setOperationAction(ISD::UREM, MVT::i64, Legal);
277   } else {
278     setOperationAction(ISD::SREM, MVT::i32, Expand);
279     setOperationAction(ISD::UREM, MVT::i32, Expand);
280     setOperationAction(ISD::SREM, MVT::i64, Expand);
281     setOperationAction(ISD::UREM, MVT::i64, Expand);
282   }
283 
284   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
285   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
286   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
287   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
288   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
289   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
290   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
291   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
292   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
293 
294   // Handle constrained floating-point operations of scalar.
295   // TODO: Handle SPE specific operation.
296   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
297   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
298   setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
299   setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
300   setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
301   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
302 
303   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
304   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
305   setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
306   setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
307   setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
308   if (Subtarget.hasVSX())
309     setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Legal);
310 
311   if (Subtarget.hasFSQRT()) {
312     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
313     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
314   }
315 
316   if (Subtarget.hasFPRND()) {
317     setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
318     setOperationAction(ISD::STRICT_FCEIL,  MVT::f32, Legal);
319     setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
320     setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
321 
322     setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
323     setOperationAction(ISD::STRICT_FCEIL,  MVT::f64, Legal);
324     setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
325     setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
326   }
327 
328   // We don't support sin/cos/sqrt/fmod/pow
329   setOperationAction(ISD::FSIN , MVT::f64, Expand);
330   setOperationAction(ISD::FCOS , MVT::f64, Expand);
331   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
332   setOperationAction(ISD::FREM , MVT::f64, Expand);
333   setOperationAction(ISD::FPOW , MVT::f64, Expand);
334   setOperationAction(ISD::FSIN , MVT::f32, Expand);
335   setOperationAction(ISD::FCOS , MVT::f32, Expand);
336   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
337   setOperationAction(ISD::FREM , MVT::f32, Expand);
338   setOperationAction(ISD::FPOW , MVT::f32, Expand);
339   if (Subtarget.hasSPE()) {
340     setOperationAction(ISD::FMA  , MVT::f64, Expand);
341     setOperationAction(ISD::FMA  , MVT::f32, Expand);
342   } else {
343     setOperationAction(ISD::FMA  , MVT::f64, Legal);
344     setOperationAction(ISD::FMA  , MVT::f32, Legal);
345   }
346 
347   if (Subtarget.hasSPE())
348     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
349 
350   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
351 
352   // If we're enabling GP optimizations, use hardware square root
353   if (!Subtarget.hasFSQRT() &&
354       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
355         Subtarget.hasFRE()))
356     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
357 
358   if (!Subtarget.hasFSQRT() &&
359       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
360         Subtarget.hasFRES()))
361     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
362 
363   if (Subtarget.hasFCPSGN()) {
364     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
365     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
366   } else {
367     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
368     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
369   }
370 
371   if (Subtarget.hasFPRND()) {
372     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
373     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
374     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
375     setOperationAction(ISD::FROUND, MVT::f64, Legal);
376 
377     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
378     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
379     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
380     setOperationAction(ISD::FROUND, MVT::f32, Legal);
381   }
382 
383   // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
384   // to speed up scalar BSWAP64.
385   // CTPOP or CTTZ were introduced in P8/P9 respectively
386   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
387   if (Subtarget.hasP9Vector())
388     setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
389   else
390     setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
391   if (Subtarget.isISA3_0()) {
392     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
393     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
394   } else {
395     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
396     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
397   }
398 
399   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
400     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
401     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
402   } else {
403     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
404     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
405   }
406 
407   // PowerPC does not have ROTR
408   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
409   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
410 
411   if (!Subtarget.useCRBits()) {
412     // PowerPC does not have Select
413     setOperationAction(ISD::SELECT, MVT::i32, Expand);
414     setOperationAction(ISD::SELECT, MVT::i64, Expand);
415     setOperationAction(ISD::SELECT, MVT::f32, Expand);
416     setOperationAction(ISD::SELECT, MVT::f64, Expand);
417   }
418 
419   // PowerPC wants to turn select_cc of FP into fsel when possible.
420   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
421   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
422 
423   // PowerPC wants to optimize integer setcc a bit
424   if (!Subtarget.useCRBits())
425     setOperationAction(ISD::SETCC, MVT::i32, Custom);
426 
427   if (Subtarget.hasFPU()) {
428     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
429     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
430     setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
431 
432     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
433     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
434     setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
435   }
436 
437   // PowerPC does not have BRCOND which requires SetCC
438   if (!Subtarget.useCRBits())
439     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
440 
441   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
442 
443   if (Subtarget.hasSPE()) {
444     // SPE has built-in conversions
445     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
446     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
447     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
448     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
449     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
450     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
451   } else {
452     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
453     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
454     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
455 
456     // PowerPC does not have [U|S]INT_TO_FP
457     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
458     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
459   }
460 
461   if (Subtarget.hasDirectMove() && isPPC64) {
462     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
463     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
464     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
465     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
466     if (TM.Options.UnsafeFPMath) {
467       setOperationAction(ISD::LRINT, MVT::f64, Legal);
468       setOperationAction(ISD::LRINT, MVT::f32, Legal);
469       setOperationAction(ISD::LLRINT, MVT::f64, Legal);
470       setOperationAction(ISD::LLRINT, MVT::f32, Legal);
471       setOperationAction(ISD::LROUND, MVT::f64, Legal);
472       setOperationAction(ISD::LROUND, MVT::f32, Legal);
473       setOperationAction(ISD::LLROUND, MVT::f64, Legal);
474       setOperationAction(ISD::LLROUND, MVT::f32, Legal);
475     }
476   } else {
477     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
478     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
479     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
480     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
481   }
482 
483   // We cannot sextinreg(i1).  Expand to shifts.
484   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
485 
486   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
487   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
488   // support continuation, user-level threading, and etc.. As a result, no
489   // other SjLj exception interfaces are implemented and please don't build
490   // your own exception handling based on them.
491   // LLVM/Clang supports zero-cost DWARF exception handling.
492   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
493   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
494 
495   // We want to legalize GlobalAddress and ConstantPool nodes into the
496   // appropriate instructions to materialize the address.
497   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
498   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
499   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
500   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
501   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
502   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
503   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
504   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
505   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
506   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
507 
508   // TRAP is legal.
509   setOperationAction(ISD::TRAP, MVT::Other, Legal);
510 
511   // TRAMPOLINE is custom lowered.
512   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
513   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
514 
515   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
516   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
517 
518   if (Subtarget.is64BitELFABI()) {
519     // VAARG always uses double-word chunks, so promote anything smaller.
520     setOperationAction(ISD::VAARG, MVT::i1, Promote);
521     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
522     setOperationAction(ISD::VAARG, MVT::i8, Promote);
523     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
524     setOperationAction(ISD::VAARG, MVT::i16, Promote);
525     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
526     setOperationAction(ISD::VAARG, MVT::i32, Promote);
527     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
528     setOperationAction(ISD::VAARG, MVT::Other, Expand);
529   } else if (Subtarget.is32BitELFABI()) {
530     // VAARG is custom lowered with the 32-bit SVR4 ABI.
531     setOperationAction(ISD::VAARG, MVT::Other, Custom);
532     setOperationAction(ISD::VAARG, MVT::i64, Custom);
533   } else
534     setOperationAction(ISD::VAARG, MVT::Other, Expand);
535 
536   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
537   if (Subtarget.is32BitELFABI())
538     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
539   else
540     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
541 
542   // Use the default implementation.
543   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
544   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
545   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
546   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
547   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
548   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
549   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
550   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
551   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
552 
553   // We want to custom lower some of our intrinsics.
554   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
555 
556   // To handle counter-based loop conditions.
557   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
558 
559   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
560   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
561   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
562   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
563 
564   // Comparisons that require checking two conditions.
565   if (Subtarget.hasSPE()) {
566     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
567     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
568     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
569     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
570   }
571   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
572   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
573   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
574   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
575   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
576   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
577   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
578   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
579   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
580   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
581   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
582   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
583 
584   if (Subtarget.has64BitSupport()) {
585     // They also have instructions for converting between i64 and fp.
586     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
587     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
588     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
589     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
590     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
591     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
592     // This is just the low 32 bits of a (signed) fp->i64 conversion.
593     // We cannot do this with Promote because i64 is not a legal type.
594     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
595     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
596 
597     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
598       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
599   } else {
600     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
601     if (Subtarget.hasSPE()) {
602       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
603       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
604     } else {
605       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
606       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
607     }
608   }
609 
610   // With the instructions enabled under FPCVT, we can do everything.
611   if (Subtarget.hasFPCVT()) {
612     if (Subtarget.has64BitSupport()) {
613       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
614       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
615       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
616       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
617       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
618       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
619     }
620 
621     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
622     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
623     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
624     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
625     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
626     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
627   }
628 
629   if (Subtarget.use64BitRegs()) {
630     // 64-bit PowerPC implementations can support i64 types directly
631     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
632     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
633     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
634     // 64-bit PowerPC wants to expand i128 shifts itself.
635     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
636     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
637     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
638   } else {
639     // 32-bit PowerPC wants to expand i64 shifts itself.
640     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
641     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
642     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
643   }
644 
645   // PowerPC has better expansions for funnel shifts than the generic
646   // TargetLowering::expandFunnelShift.
647   if (Subtarget.has64BitSupport()) {
648     setOperationAction(ISD::FSHL, MVT::i64, Custom);
649     setOperationAction(ISD::FSHR, MVT::i64, Custom);
650   }
651   setOperationAction(ISD::FSHL, MVT::i32, Custom);
652   setOperationAction(ISD::FSHR, MVT::i32, Custom);
653 
654   if (Subtarget.hasVSX()) {
655     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
656     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
657     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
658     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
659   }
660 
661   if (Subtarget.hasAltivec()) {
662     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
663       setOperationAction(ISD::SADDSAT, VT, Legal);
664       setOperationAction(ISD::SSUBSAT, VT, Legal);
665       setOperationAction(ISD::UADDSAT, VT, Legal);
666       setOperationAction(ISD::USUBSAT, VT, Legal);
667     }
668     // First set operation action for all vector types to expand. Then we
669     // will selectively turn on ones that can be effectively codegen'd.
670     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
671       // add/sub are legal for all supported vector VT's.
672       setOperationAction(ISD::ADD, VT, Legal);
673       setOperationAction(ISD::SUB, VT, Legal);
674 
675       // For v2i64, these are only valid with P8Vector. This is corrected after
676       // the loop.
677       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
678         setOperationAction(ISD::SMAX, VT, Legal);
679         setOperationAction(ISD::SMIN, VT, Legal);
680         setOperationAction(ISD::UMAX, VT, Legal);
681         setOperationAction(ISD::UMIN, VT, Legal);
682       }
683       else {
684         setOperationAction(ISD::SMAX, VT, Expand);
685         setOperationAction(ISD::SMIN, VT, Expand);
686         setOperationAction(ISD::UMAX, VT, Expand);
687         setOperationAction(ISD::UMIN, VT, Expand);
688       }
689 
690       if (Subtarget.hasVSX()) {
691         setOperationAction(ISD::FMAXNUM, VT, Legal);
692         setOperationAction(ISD::FMINNUM, VT, Legal);
693       }
694 
695       // Vector instructions introduced in P8
696       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
697         setOperationAction(ISD::CTPOP, VT, Legal);
698         setOperationAction(ISD::CTLZ, VT, Legal);
699       }
700       else {
701         setOperationAction(ISD::CTPOP, VT, Expand);
702         setOperationAction(ISD::CTLZ, VT, Expand);
703       }
704 
705       // Vector instructions introduced in P9
706       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
707         setOperationAction(ISD::CTTZ, VT, Legal);
708       else
709         setOperationAction(ISD::CTTZ, VT, Expand);
710 
711       // We promote all shuffles to v16i8.
712       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
713       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
714 
715       // We promote all non-typed operations to v4i32.
716       setOperationAction(ISD::AND   , VT, Promote);
717       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
718       setOperationAction(ISD::OR    , VT, Promote);
719       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
720       setOperationAction(ISD::XOR   , VT, Promote);
721       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
722       setOperationAction(ISD::LOAD  , VT, Promote);
723       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
724       setOperationAction(ISD::SELECT, VT, Promote);
725       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
726       setOperationAction(ISD::VSELECT, VT, Legal);
727       setOperationAction(ISD::SELECT_CC, VT, Promote);
728       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
729       setOperationAction(ISD::STORE, VT, Promote);
730       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
731 
732       // No other operations are legal.
733       setOperationAction(ISD::MUL , VT, Expand);
734       setOperationAction(ISD::SDIV, VT, Expand);
735       setOperationAction(ISD::SREM, VT, Expand);
736       setOperationAction(ISD::UDIV, VT, Expand);
737       setOperationAction(ISD::UREM, VT, Expand);
738       setOperationAction(ISD::FDIV, VT, Expand);
739       setOperationAction(ISD::FREM, VT, Expand);
740       setOperationAction(ISD::FNEG, VT, Expand);
741       setOperationAction(ISD::FSQRT, VT, Expand);
742       setOperationAction(ISD::FLOG, VT, Expand);
743       setOperationAction(ISD::FLOG10, VT, Expand);
744       setOperationAction(ISD::FLOG2, VT, Expand);
745       setOperationAction(ISD::FEXP, VT, Expand);
746       setOperationAction(ISD::FEXP2, VT, Expand);
747       setOperationAction(ISD::FSIN, VT, Expand);
748       setOperationAction(ISD::FCOS, VT, Expand);
749       setOperationAction(ISD::FABS, VT, Expand);
750       setOperationAction(ISD::FFLOOR, VT, Expand);
751       setOperationAction(ISD::FCEIL,  VT, Expand);
752       setOperationAction(ISD::FTRUNC, VT, Expand);
753       setOperationAction(ISD::FRINT,  VT, Expand);
754       setOperationAction(ISD::FNEARBYINT, VT, Expand);
755       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
756       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
757       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
758       setOperationAction(ISD::MULHU, VT, Expand);
759       setOperationAction(ISD::MULHS, VT, Expand);
760       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
761       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
762       setOperationAction(ISD::UDIVREM, VT, Expand);
763       setOperationAction(ISD::SDIVREM, VT, Expand);
764       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
765       setOperationAction(ISD::FPOW, VT, Expand);
766       setOperationAction(ISD::BSWAP, VT, Expand);
767       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
768       setOperationAction(ISD::ROTL, VT, Expand);
769       setOperationAction(ISD::ROTR, VT, Expand);
770 
771       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
772         setTruncStoreAction(VT, InnerVT, Expand);
773         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
774         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
775         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
776       }
777     }
778     setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
779     if (!Subtarget.hasP8Vector()) {
780       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
781       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
782       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
783       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
784     }
785 
786     for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
787       setOperationAction(ISD::ABS, VT, Custom);
788 
789     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
790     // with merges, splats, etc.
791     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
792 
793     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
794     // are cheap, so handle them before they get expanded to scalar.
795     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
796     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
797     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
798     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
799     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
800 
801     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
802     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
803     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
804     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
805     setOperationAction(ISD::SELECT, MVT::v4i32,
806                        Subtarget.useCRBits() ? Legal : Expand);
807     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
808     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
809     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
810     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
811     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
812     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
813     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
814     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
815     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
816 
817     // Without hasP8Altivec set, v2i64 SMAX isn't available.
818     // But ABS custom lowering requires SMAX support.
819     if (!Subtarget.hasP8Altivec())
820       setOperationAction(ISD::ABS, MVT::v2i64, Expand);
821 
822     // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
823     setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
824     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
825     if (Subtarget.hasAltivec())
826       for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
827         setOperationAction(ISD::ROTL, VT, Legal);
828     // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
829     if (Subtarget.hasP8Altivec())
830       setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
831 
832     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
833     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
834     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
835     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
836 
837     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
838     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
839 
840     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
841       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
842       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
843     }
844 
845     if (Subtarget.hasP8Altivec())
846       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
847     else
848       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
849 
850     if (Subtarget.isISA3_1()) {
851       setOperationAction(ISD::MUL, MVT::v2i64, Legal);
852       setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
853       setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
854       setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
855       setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
856       setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
857       setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
858       setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
859       setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
860       setOperationAction(ISD::UREM, MVT::v2i64, Legal);
861       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
862       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
863       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
864     }
865 
866     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
867     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
868 
869     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
870     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
871 
872     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
873     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
874     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
875     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
876 
877     // Altivec does not contain unordered floating-point compare instructions
878     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
879     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
880     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
881     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
882 
883     if (Subtarget.hasVSX()) {
884       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
885       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
886       if (Subtarget.hasP8Vector()) {
887         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
888         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
889       }
890       if (Subtarget.hasDirectMove() && isPPC64) {
891         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
892         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
893         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
894         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
895         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
896         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
897         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
898         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
899       }
900       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
901 
902       // The nearbyint variants are not allowed to raise the inexact exception
903       // so we can only code-gen them with unsafe math.
904       if (TM.Options.UnsafeFPMath) {
905         setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
906         setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
907       }
908 
909       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
910       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
911       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
912       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
913       setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
914       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
915       setOperationAction(ISD::FROUND, MVT::f64, Legal);
916       setOperationAction(ISD::FRINT, MVT::f64, Legal);
917 
918       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
919       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
920       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
921       setOperationAction(ISD::FROUND, MVT::f32, Legal);
922       setOperationAction(ISD::FRINT, MVT::f32, Legal);
923 
924       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
925       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
926 
927       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
928       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
929 
930       // Share the Altivec comparison restrictions.
931       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
932       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
933       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
934       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
935 
936       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
937       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
938 
939       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
940 
941       if (Subtarget.hasP8Vector())
942         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
943 
944       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
945 
946       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
947       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
948       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
949 
950       if (Subtarget.hasP8Altivec()) {
951         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
952         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
953         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
954 
955         // 128 bit shifts can be accomplished via 3 instructions for SHL and
956         // SRL, but not for SRA because of the instructions available:
957         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
958         // doing
959         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
960         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
961         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
962 
963         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
964       }
965       else {
966         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
967         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
968         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
969 
970         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
971 
972         // VSX v2i64 only supports non-arithmetic operations.
973         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
974         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
975       }
976 
977       setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
978 
979       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
980       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
981       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
982       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
983 
984       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
985 
986       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
987       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
988       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
989       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
990 
991       // Custom handling for partial vectors of integers converted to
992       // floating point. We already have optimal handling for v2i32 through
993       // the DAG combine, so those aren't necessary.
994       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
995       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
996       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
997       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
998       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
999       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1000       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1001       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1002 
1003       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1004       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1005       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1006       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1007       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1008       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1009 
1010       if (Subtarget.hasDirectMove())
1011         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1012       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1013 
1014       // Handle constrained floating-point operations of vector.
1015       // The predictor is `hasVSX` because altivec instruction has
1016       // no exception but VSX vector instruction has.
1017       setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1018       setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1019       setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1020       setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1021       setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1022       setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1023       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1024       setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1025       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
1026       setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1027       setOperationAction(ISD::STRICT_FCEIL,  MVT::v4f32, Legal);
1028       setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1029       setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1030 
1031       setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1032       setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1033       setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1034       setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1035       setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1036       setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1037       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1038       setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1039       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
1040       setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1041       setOperationAction(ISD::STRICT_FCEIL,  MVT::v2f64, Legal);
1042       setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1043       setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1044 
1045       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1046     }
1047 
1048     if (Subtarget.hasP8Altivec()) {
1049       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1050       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1051     }
1052 
1053     if (Subtarget.hasP9Vector()) {
1054       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1055       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1056 
1057       // 128 bit shifts can be accomplished via 3 instructions for SHL and
1058       // SRL, but not for SRA because of the instructions available:
1059       // VS{RL} and VS{RL}O.
1060       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1061       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1062       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1063 
1064       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1065       setOperationAction(ISD::FADD, MVT::f128, Legal);
1066       setOperationAction(ISD::FSUB, MVT::f128, Legal);
1067       setOperationAction(ISD::FDIV, MVT::f128, Legal);
1068       setOperationAction(ISD::FMUL, MVT::f128, Legal);
1069       setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1070       // No extending loads to f128 on PPC.
1071       for (MVT FPT : MVT::fp_valuetypes())
1072         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1073       setOperationAction(ISD::FMA, MVT::f128, Legal);
1074       setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1075       setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1076       setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1077       setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1078       setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1079       setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1080 
1081       setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1082       setOperationAction(ISD::FRINT, MVT::f128, Legal);
1083       setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1084       setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1085       setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1086       setOperationAction(ISD::FROUND, MVT::f128, Legal);
1087 
1088       setOperationAction(ISD::SELECT, MVT::f128, Expand);
1089       setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1090       setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1091       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1092       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1093       setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1094       // No implementation for these ops for PowerPC.
1095       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1096       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1097       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1098       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1099       setOperationAction(ISD::FREM, MVT::f128, Expand);
1100 
1101       // Handle constrained floating-point operations of fp128
1102       setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1103       setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1104       setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1105       setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1106       setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1107       setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1108       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1109       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1110       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1111       setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1112       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1113       setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1114       setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1115       setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1116       setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1117       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1118       setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1119       setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1120       setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1121       setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1122     }
1123 
1124     if (Subtarget.hasP9Altivec()) {
1125       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1126       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1127 
1128       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1129       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1130       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1131       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1132       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1133       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1134       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1135     }
1136   }
1137 
1138   if (Subtarget.has64BitSupport())
1139     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1140 
1141   if (Subtarget.isISA3_1())
1142     setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1143 
1144   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1145 
1146   if (!isPPC64) {
1147     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1148     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1149   }
1150 
1151   setBooleanContents(ZeroOrOneBooleanContent);
1152 
1153   if (Subtarget.hasAltivec()) {
1154     // Altivec instructions set fields to all zeros or all ones.
1155     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1156   }
1157 
1158   if (!isPPC64) {
1159     // These libcalls are not available in 32-bit.
1160     setLibcallName(RTLIB::SHL_I128, nullptr);
1161     setLibcallName(RTLIB::SRL_I128, nullptr);
1162     setLibcallName(RTLIB::SRA_I128, nullptr);
1163   }
1164 
1165   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1166 
1167   // We have target-specific dag combine patterns for the following nodes:
1168   setTargetDAGCombine(ISD::ADD);
1169   setTargetDAGCombine(ISD::SHL);
1170   setTargetDAGCombine(ISD::SRA);
1171   setTargetDAGCombine(ISD::SRL);
1172   setTargetDAGCombine(ISD::MUL);
1173   setTargetDAGCombine(ISD::FMA);
1174   setTargetDAGCombine(ISD::SINT_TO_FP);
1175   setTargetDAGCombine(ISD::BUILD_VECTOR);
1176   if (Subtarget.hasFPCVT())
1177     setTargetDAGCombine(ISD::UINT_TO_FP);
1178   setTargetDAGCombine(ISD::LOAD);
1179   setTargetDAGCombine(ISD::STORE);
1180   setTargetDAGCombine(ISD::BR_CC);
1181   if (Subtarget.useCRBits())
1182     setTargetDAGCombine(ISD::BRCOND);
1183   setTargetDAGCombine(ISD::BSWAP);
1184   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1185   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1186   setTargetDAGCombine(ISD::INTRINSIC_VOID);
1187 
1188   setTargetDAGCombine(ISD::SIGN_EXTEND);
1189   setTargetDAGCombine(ISD::ZERO_EXTEND);
1190   setTargetDAGCombine(ISD::ANY_EXTEND);
1191 
1192   setTargetDAGCombine(ISD::TRUNCATE);
1193   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1194 
1195 
1196   if (Subtarget.useCRBits()) {
1197     setTargetDAGCombine(ISD::TRUNCATE);
1198     setTargetDAGCombine(ISD::SETCC);
1199     setTargetDAGCombine(ISD::SELECT_CC);
1200   }
1201 
1202   // Use reciprocal estimates.
1203   if (TM.Options.UnsafeFPMath) {
1204     setTargetDAGCombine(ISD::FDIV);
1205     setTargetDAGCombine(ISD::FSQRT);
1206   }
1207 
1208   if (Subtarget.hasP9Altivec()) {
1209     setTargetDAGCombine(ISD::ABS);
1210     setTargetDAGCombine(ISD::VSELECT);
1211   }
1212 
1213   setLibcallName(RTLIB::LOG_F128, "logf128");
1214   setLibcallName(RTLIB::LOG2_F128, "log2f128");
1215   setLibcallName(RTLIB::LOG10_F128, "log10f128");
1216   setLibcallName(RTLIB::EXP_F128, "expf128");
1217   setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1218   setLibcallName(RTLIB::SIN_F128, "sinf128");
1219   setLibcallName(RTLIB::COS_F128, "cosf128");
1220   setLibcallName(RTLIB::POW_F128, "powf128");
1221   setLibcallName(RTLIB::FMIN_F128, "fminf128");
1222   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1223   setLibcallName(RTLIB::POWI_F128, "__powikf2");
1224   setLibcallName(RTLIB::REM_F128, "fmodf128");
1225 
1226   // With 32 condition bits, we don't need to sink (and duplicate) compares
1227   // aggressively in CodeGenPrep.
1228   if (Subtarget.useCRBits()) {
1229     setHasMultipleConditionRegisters();
1230     setJumpIsExpensive();
1231   }
1232 
1233   setMinFunctionAlignment(Align(4));
1234 
1235   switch (Subtarget.getCPUDirective()) {
1236   default: break;
1237   case PPC::DIR_970:
1238   case PPC::DIR_A2:
1239   case PPC::DIR_E500:
1240   case PPC::DIR_E500mc:
1241   case PPC::DIR_E5500:
1242   case PPC::DIR_PWR4:
1243   case PPC::DIR_PWR5:
1244   case PPC::DIR_PWR5X:
1245   case PPC::DIR_PWR6:
1246   case PPC::DIR_PWR6X:
1247   case PPC::DIR_PWR7:
1248   case PPC::DIR_PWR8:
1249   case PPC::DIR_PWR9:
1250   case PPC::DIR_PWR10:
1251   case PPC::DIR_PWR_FUTURE:
1252     setPrefLoopAlignment(Align(16));
1253     setPrefFunctionAlignment(Align(16));
1254     break;
1255   }
1256 
1257   if (Subtarget.enableMachineScheduler())
1258     setSchedulingPreference(Sched::Source);
1259   else
1260     setSchedulingPreference(Sched::Hybrid);
1261 
1262   computeRegisterProperties(STI.getRegisterInfo());
1263 
1264   // The Freescale cores do better with aggressive inlining of memcpy and
1265   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1266   if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1267       Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1268     MaxStoresPerMemset = 32;
1269     MaxStoresPerMemsetOptSize = 16;
1270     MaxStoresPerMemcpy = 32;
1271     MaxStoresPerMemcpyOptSize = 8;
1272     MaxStoresPerMemmove = 32;
1273     MaxStoresPerMemmoveOptSize = 8;
1274   } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1275     // The A2 also benefits from (very) aggressive inlining of memcpy and
1276     // friends. The overhead of a the function call, even when warm, can be
1277     // over one hundred cycles.
1278     MaxStoresPerMemset = 128;
1279     MaxStoresPerMemcpy = 128;
1280     MaxStoresPerMemmove = 128;
1281     MaxLoadsPerMemcmp = 128;
1282   } else {
1283     MaxLoadsPerMemcmp = 8;
1284     MaxLoadsPerMemcmpOptSize = 4;
1285   }
1286 
1287   // Let the subtarget (CPU) decide if a predictable select is more expensive
1288   // than the corresponding branch. This information is used in CGP to decide
1289   // when to convert selects into branches.
1290   PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1291 }
1292 
1293 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1294 /// the desired ByVal argument alignment.
1295 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1296   if (MaxAlign == MaxMaxAlign)
1297     return;
1298   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1299     if (MaxMaxAlign >= 32 &&
1300         VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1301       MaxAlign = Align(32);
1302     else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1303              MaxAlign < 16)
1304       MaxAlign = Align(16);
1305   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1306     Align EltAlign;
1307     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1308     if (EltAlign > MaxAlign)
1309       MaxAlign = EltAlign;
1310   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1311     for (auto *EltTy : STy->elements()) {
1312       Align EltAlign;
1313       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1314       if (EltAlign > MaxAlign)
1315         MaxAlign = EltAlign;
1316       if (MaxAlign == MaxMaxAlign)
1317         break;
1318     }
1319   }
1320 }
1321 
1322 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1323 /// function arguments in the caller parameter area.
1324 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1325                                                   const DataLayout &DL) const {
1326   // 16byte and wider vectors are passed on 16byte boundary.
1327   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1328   Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1329   if (Subtarget.hasAltivec())
1330     getMaxByValAlign(Ty, Alignment, Align(16));
1331   return Alignment.value();
1332 }
1333 
1334 bool PPCTargetLowering::useSoftFloat() const {
1335   return Subtarget.useSoftFloat();
1336 }
1337 
1338 bool PPCTargetLowering::hasSPE() const {
1339   return Subtarget.hasSPE();
1340 }
1341 
1342 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1343   return VT.isScalarInteger();
1344 }
1345 
1346 /// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific
1347 /// type is cheaper than a multiply followed by a shift.
1348 /// This is true for words and doublewords on 64-bit PowerPC.
1349 bool PPCTargetLowering::isMulhCheaperThanMulShift(EVT Type) const {
1350   if (Subtarget.isPPC64() && (isOperationLegal(ISD::MULHS, Type) ||
1351                               isOperationLegal(ISD::MULHU, Type)))
1352     return true;
1353   return TargetLowering::isMulhCheaperThanMulShift(Type);
1354 }
1355 
1356 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1357   switch ((PPCISD::NodeType)Opcode) {
1358   case PPCISD::FIRST_NUMBER:    break;
1359   case PPCISD::FSEL:            return "PPCISD::FSEL";
1360   case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
1361   case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
1362   case PPCISD::FCFID:           return "PPCISD::FCFID";
1363   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1364   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1365   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1366   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1367   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1368   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1369   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1370   case PPCISD::FP_TO_UINT_IN_VSR:
1371                                 return "PPCISD::FP_TO_UINT_IN_VSR,";
1372   case PPCISD::FP_TO_SINT_IN_VSR:
1373                                 return "PPCISD::FP_TO_SINT_IN_VSR";
1374   case PPCISD::FRE:             return "PPCISD::FRE";
1375   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1376   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1377   case PPCISD::VPERM:           return "PPCISD::VPERM";
1378   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1379   case PPCISD::XXSPLTI_SP_TO_DP:
1380     return "PPCISD::XXSPLTI_SP_TO_DP";
1381   case PPCISD::XXSPLTI32DX:
1382     return "PPCISD::XXSPLTI32DX";
1383   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1384   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1385   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1386   case PPCISD::CMPB:            return "PPCISD::CMPB";
1387   case PPCISD::Hi:              return "PPCISD::Hi";
1388   case PPCISD::Lo:              return "PPCISD::Lo";
1389   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1390   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1391   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1392   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1393   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1394   case PPCISD::PROBED_ALLOCA:   return "PPCISD::PROBED_ALLOCA";
1395   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1396   case PPCISD::SRL:             return "PPCISD::SRL";
1397   case PPCISD::SRA:             return "PPCISD::SRA";
1398   case PPCISD::SHL:             return "PPCISD::SHL";
1399   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1400   case PPCISD::CALL:            return "PPCISD::CALL";
1401   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1402   case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1403   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1404   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1405   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1406   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1407   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1408   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1409   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1410   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1411   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1412   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1413   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1414   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1415   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1416   case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1417     return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1418   case PPCISD::ANDI_rec_1_EQ_BIT:
1419     return "PPCISD::ANDI_rec_1_EQ_BIT";
1420   case PPCISD::ANDI_rec_1_GT_BIT:
1421     return "PPCISD::ANDI_rec_1_GT_BIT";
1422   case PPCISD::VCMP:            return "PPCISD::VCMP";
1423   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1424   case PPCISD::LBRX:            return "PPCISD::LBRX";
1425   case PPCISD::STBRX:           return "PPCISD::STBRX";
1426   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1427   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1428   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1429   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1430   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1431   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1432   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1433   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1434   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1435   case PPCISD::ST_VSR_SCAL_INT:
1436                                 return "PPCISD::ST_VSR_SCAL_INT";
1437   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1438   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1439   case PPCISD::BDZ:             return "PPCISD::BDZ";
1440   case PPCISD::MFFS:            return "PPCISD::MFFS";
1441   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1442   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1443   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1444   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1445   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1446   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1447   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1448   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1449   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1450   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1451   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1452   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1453   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1454   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1455   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1456   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1457   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1458   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1459   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1460   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1461   case PPCISD::SC:              return "PPCISD::SC";
1462   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1463   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1464   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1465   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1466   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1467   case PPCISD::VABSD:           return "PPCISD::VABSD";
1468   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1469   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1470   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1471   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1472   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1473   case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1474   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1475   case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1476     return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1477   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1478   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
1479   case PPCISD::STRICT_FCTIDZ:
1480     return "PPCISD::STRICT_FCTIDZ";
1481   case PPCISD::STRICT_FCTIWZ:
1482     return "PPCISD::STRICT_FCTIWZ";
1483   case PPCISD::STRICT_FCTIDUZ:
1484     return "PPCISD::STRICT_FCTIDUZ";
1485   case PPCISD::STRICT_FCTIWUZ:
1486     return "PPCISD::STRICT_FCTIWUZ";
1487   }
1488   return nullptr;
1489 }
1490 
1491 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1492                                           EVT VT) const {
1493   if (!VT.isVector())
1494     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1495 
1496   return VT.changeVectorElementTypeToInteger();
1497 }
1498 
1499 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1500   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1501   return true;
1502 }
1503 
1504 //===----------------------------------------------------------------------===//
1505 // Node matching predicates, for use by the tblgen matching code.
1506 //===----------------------------------------------------------------------===//
1507 
1508 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1509 static bool isFloatingPointZero(SDValue Op) {
1510   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1511     return CFP->getValueAPF().isZero();
1512   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1513     // Maybe this has already been legalized into the constant pool?
1514     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1515       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1516         return CFP->getValueAPF().isZero();
1517   }
1518   return false;
1519 }
1520 
1521 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1522 /// true if Op is undef or if it matches the specified value.
1523 static bool isConstantOrUndef(int Op, int Val) {
1524   return Op < 0 || Op == Val;
1525 }
1526 
1527 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1528 /// VPKUHUM instruction.
1529 /// The ShuffleKind distinguishes between big-endian operations with
1530 /// two different inputs (0), either-endian operations with two identical
1531 /// inputs (1), and little-endian operations with two different inputs (2).
1532 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1533 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1534                                SelectionDAG &DAG) {
1535   bool IsLE = DAG.getDataLayout().isLittleEndian();
1536   if (ShuffleKind == 0) {
1537     if (IsLE)
1538       return false;
1539     for (unsigned i = 0; i != 16; ++i)
1540       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1541         return false;
1542   } else if (ShuffleKind == 2) {
1543     if (!IsLE)
1544       return false;
1545     for (unsigned i = 0; i != 16; ++i)
1546       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1547         return false;
1548   } else if (ShuffleKind == 1) {
1549     unsigned j = IsLE ? 0 : 1;
1550     for (unsigned i = 0; i != 8; ++i)
1551       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1552           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1553         return false;
1554   }
1555   return true;
1556 }
1557 
1558 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1559 /// VPKUWUM instruction.
1560 /// The ShuffleKind distinguishes between big-endian operations with
1561 /// two different inputs (0), either-endian operations with two identical
1562 /// inputs (1), and little-endian operations with two different inputs (2).
1563 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1564 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1565                                SelectionDAG &DAG) {
1566   bool IsLE = DAG.getDataLayout().isLittleEndian();
1567   if (ShuffleKind == 0) {
1568     if (IsLE)
1569       return false;
1570     for (unsigned i = 0; i != 16; i += 2)
1571       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1572           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1573         return false;
1574   } else if (ShuffleKind == 2) {
1575     if (!IsLE)
1576       return false;
1577     for (unsigned i = 0; i != 16; i += 2)
1578       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1579           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1580         return false;
1581   } else if (ShuffleKind == 1) {
1582     unsigned j = IsLE ? 0 : 2;
1583     for (unsigned i = 0; i != 8; i += 2)
1584       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1585           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1586           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1587           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1588         return false;
1589   }
1590   return true;
1591 }
1592 
1593 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1594 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1595 /// current subtarget.
1596 ///
1597 /// The ShuffleKind distinguishes between big-endian operations with
1598 /// two different inputs (0), either-endian operations with two identical
1599 /// inputs (1), and little-endian operations with two different inputs (2).
1600 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1601 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1602                                SelectionDAG &DAG) {
1603   const PPCSubtarget& Subtarget =
1604       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1605   if (!Subtarget.hasP8Vector())
1606     return false;
1607 
1608   bool IsLE = DAG.getDataLayout().isLittleEndian();
1609   if (ShuffleKind == 0) {
1610     if (IsLE)
1611       return false;
1612     for (unsigned i = 0; i != 16; i += 4)
1613       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1614           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1615           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1616           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1617         return false;
1618   } else if (ShuffleKind == 2) {
1619     if (!IsLE)
1620       return false;
1621     for (unsigned i = 0; i != 16; i += 4)
1622       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1623           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1624           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1625           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1626         return false;
1627   } else if (ShuffleKind == 1) {
1628     unsigned j = IsLE ? 0 : 4;
1629     for (unsigned i = 0; i != 8; i += 4)
1630       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1631           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1632           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1633           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1634           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1635           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1636           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1637           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1638         return false;
1639   }
1640   return true;
1641 }
1642 
1643 /// isVMerge - Common function, used to match vmrg* shuffles.
1644 ///
1645 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1646                      unsigned LHSStart, unsigned RHSStart) {
1647   if (N->getValueType(0) != MVT::v16i8)
1648     return false;
1649   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1650          "Unsupported merge size!");
1651 
1652   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1653     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1654       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1655                              LHSStart+j+i*UnitSize) ||
1656           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1657                              RHSStart+j+i*UnitSize))
1658         return false;
1659     }
1660   return true;
1661 }
1662 
1663 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1664 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1665 /// The ShuffleKind distinguishes between big-endian merges with two
1666 /// different inputs (0), either-endian merges with two identical inputs (1),
1667 /// and little-endian merges with two different inputs (2).  For the latter,
1668 /// the input operands are swapped (see PPCInstrAltivec.td).
1669 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1670                              unsigned ShuffleKind, SelectionDAG &DAG) {
1671   if (DAG.getDataLayout().isLittleEndian()) {
1672     if (ShuffleKind == 1) // unary
1673       return isVMerge(N, UnitSize, 0, 0);
1674     else if (ShuffleKind == 2) // swapped
1675       return isVMerge(N, UnitSize, 0, 16);
1676     else
1677       return false;
1678   } else {
1679     if (ShuffleKind == 1) // unary
1680       return isVMerge(N, UnitSize, 8, 8);
1681     else if (ShuffleKind == 0) // normal
1682       return isVMerge(N, UnitSize, 8, 24);
1683     else
1684       return false;
1685   }
1686 }
1687 
1688 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1689 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1690 /// The ShuffleKind distinguishes between big-endian merges with two
1691 /// different inputs (0), either-endian merges with two identical inputs (1),
1692 /// and little-endian merges with two different inputs (2).  For the latter,
1693 /// the input operands are swapped (see PPCInstrAltivec.td).
1694 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1695                              unsigned ShuffleKind, SelectionDAG &DAG) {
1696   if (DAG.getDataLayout().isLittleEndian()) {
1697     if (ShuffleKind == 1) // unary
1698       return isVMerge(N, UnitSize, 8, 8);
1699     else if (ShuffleKind == 2) // swapped
1700       return isVMerge(N, UnitSize, 8, 24);
1701     else
1702       return false;
1703   } else {
1704     if (ShuffleKind == 1) // unary
1705       return isVMerge(N, UnitSize, 0, 0);
1706     else if (ShuffleKind == 0) // normal
1707       return isVMerge(N, UnitSize, 0, 16);
1708     else
1709       return false;
1710   }
1711 }
1712 
1713 /**
1714  * Common function used to match vmrgew and vmrgow shuffles
1715  *
1716  * The indexOffset determines whether to look for even or odd words in
1717  * the shuffle mask. This is based on the of the endianness of the target
1718  * machine.
1719  *   - Little Endian:
1720  *     - Use offset of 0 to check for odd elements
1721  *     - Use offset of 4 to check for even elements
1722  *   - Big Endian:
1723  *     - Use offset of 0 to check for even elements
1724  *     - Use offset of 4 to check for odd elements
1725  * A detailed description of the vector element ordering for little endian and
1726  * big endian can be found at
1727  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1728  * Targeting your applications - what little endian and big endian IBM XL C/C++
1729  * compiler differences mean to you
1730  *
1731  * The mask to the shuffle vector instruction specifies the indices of the
1732  * elements from the two input vectors to place in the result. The elements are
1733  * numbered in array-access order, starting with the first vector. These vectors
1734  * are always of type v16i8, thus each vector will contain 16 elements of size
1735  * 8. More info on the shuffle vector can be found in the
1736  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1737  * Language Reference.
1738  *
1739  * The RHSStartValue indicates whether the same input vectors are used (unary)
1740  * or two different input vectors are used, based on the following:
1741  *   - If the instruction uses the same vector for both inputs, the range of the
1742  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1743  *     be 0.
1744  *   - If the instruction has two different vectors then the range of the
1745  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1746  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1747  *     to 31 specify elements in the second vector).
1748  *
1749  * \param[in] N The shuffle vector SD Node to analyze
1750  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1751  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1752  * vector to the shuffle_vector instruction
1753  * \return true iff this shuffle vector represents an even or odd word merge
1754  */
1755 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1756                      unsigned RHSStartValue) {
1757   if (N->getValueType(0) != MVT::v16i8)
1758     return false;
1759 
1760   for (unsigned i = 0; i < 2; ++i)
1761     for (unsigned j = 0; j < 4; ++j)
1762       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1763                              i*RHSStartValue+j+IndexOffset) ||
1764           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1765                              i*RHSStartValue+j+IndexOffset+8))
1766         return false;
1767   return true;
1768 }
1769 
1770 /**
1771  * Determine if the specified shuffle mask is suitable for the vmrgew or
1772  * vmrgow instructions.
1773  *
1774  * \param[in] N The shuffle vector SD Node to analyze
1775  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1776  * \param[in] ShuffleKind Identify the type of merge:
1777  *   - 0 = big-endian merge with two different inputs;
1778  *   - 1 = either-endian merge with two identical inputs;
1779  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1780  *     little-endian merges).
1781  * \param[in] DAG The current SelectionDAG
1782  * \return true iff this shuffle mask
1783  */
1784 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1785                               unsigned ShuffleKind, SelectionDAG &DAG) {
1786   if (DAG.getDataLayout().isLittleEndian()) {
1787     unsigned indexOffset = CheckEven ? 4 : 0;
1788     if (ShuffleKind == 1) // Unary
1789       return isVMerge(N, indexOffset, 0);
1790     else if (ShuffleKind == 2) // swapped
1791       return isVMerge(N, indexOffset, 16);
1792     else
1793       return false;
1794   }
1795   else {
1796     unsigned indexOffset = CheckEven ? 0 : 4;
1797     if (ShuffleKind == 1) // Unary
1798       return isVMerge(N, indexOffset, 0);
1799     else if (ShuffleKind == 0) // Normal
1800       return isVMerge(N, indexOffset, 16);
1801     else
1802       return false;
1803   }
1804   return false;
1805 }
1806 
1807 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1808 /// amount, otherwise return -1.
1809 /// The ShuffleKind distinguishes between big-endian operations with two
1810 /// different inputs (0), either-endian operations with two identical inputs
1811 /// (1), and little-endian operations with two different inputs (2).  For the
1812 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1813 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1814                              SelectionDAG &DAG) {
1815   if (N->getValueType(0) != MVT::v16i8)
1816     return -1;
1817 
1818   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1819 
1820   // Find the first non-undef value in the shuffle mask.
1821   unsigned i;
1822   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1823     /*search*/;
1824 
1825   if (i == 16) return -1;  // all undef.
1826 
1827   // Otherwise, check to see if the rest of the elements are consecutively
1828   // numbered from this value.
1829   unsigned ShiftAmt = SVOp->getMaskElt(i);
1830   if (ShiftAmt < i) return -1;
1831 
1832   ShiftAmt -= i;
1833   bool isLE = DAG.getDataLayout().isLittleEndian();
1834 
1835   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1836     // Check the rest of the elements to see if they are consecutive.
1837     for (++i; i != 16; ++i)
1838       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1839         return -1;
1840   } else if (ShuffleKind == 1) {
1841     // Check the rest of the elements to see if they are consecutive.
1842     for (++i; i != 16; ++i)
1843       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1844         return -1;
1845   } else
1846     return -1;
1847 
1848   if (isLE)
1849     ShiftAmt = 16 - ShiftAmt;
1850 
1851   return ShiftAmt;
1852 }
1853 
1854 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1855 /// specifies a splat of a single element that is suitable for input to
1856 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1857 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1858   assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1859          EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
1860 
1861   // The consecutive indices need to specify an element, not part of two
1862   // different elements.  So abandon ship early if this isn't the case.
1863   if (N->getMaskElt(0) % EltSize != 0)
1864     return false;
1865 
1866   // This is a splat operation if each element of the permute is the same, and
1867   // if the value doesn't reference the second vector.
1868   unsigned ElementBase = N->getMaskElt(0);
1869 
1870   // FIXME: Handle UNDEF elements too!
1871   if (ElementBase >= 16)
1872     return false;
1873 
1874   // Check that the indices are consecutive, in the case of a multi-byte element
1875   // splatted with a v16i8 mask.
1876   for (unsigned i = 1; i != EltSize; ++i)
1877     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1878       return false;
1879 
1880   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1881     if (N->getMaskElt(i) < 0) continue;
1882     for (unsigned j = 0; j != EltSize; ++j)
1883       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1884         return false;
1885   }
1886   return true;
1887 }
1888 
1889 /// Check that the mask is shuffling N byte elements. Within each N byte
1890 /// element of the mask, the indices could be either in increasing or
1891 /// decreasing order as long as they are consecutive.
1892 /// \param[in] N the shuffle vector SD Node to analyze
1893 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1894 /// Word/DoubleWord/QuadWord).
1895 /// \param[in] StepLen the delta indices number among the N byte element, if
1896 /// the mask is in increasing/decreasing order then it is 1/-1.
1897 /// \return true iff the mask is shuffling N byte elements.
1898 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1899                                    int StepLen) {
1900   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1901          "Unexpected element width.");
1902   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1903 
1904   unsigned NumOfElem = 16 / Width;
1905   unsigned MaskVal[16]; //  Width is never greater than 16
1906   for (unsigned i = 0; i < NumOfElem; ++i) {
1907     MaskVal[0] = N->getMaskElt(i * Width);
1908     if ((StepLen == 1) && (MaskVal[0] % Width)) {
1909       return false;
1910     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1911       return false;
1912     }
1913 
1914     for (unsigned int j = 1; j < Width; ++j) {
1915       MaskVal[j] = N->getMaskElt(i * Width + j);
1916       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1917         return false;
1918       }
1919     }
1920   }
1921 
1922   return true;
1923 }
1924 
1925 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1926                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1927   if (!isNByteElemShuffleMask(N, 4, 1))
1928     return false;
1929 
1930   // Now we look at mask elements 0,4,8,12
1931   unsigned M0 = N->getMaskElt(0) / 4;
1932   unsigned M1 = N->getMaskElt(4) / 4;
1933   unsigned M2 = N->getMaskElt(8) / 4;
1934   unsigned M3 = N->getMaskElt(12) / 4;
1935   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1936   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1937 
1938   // Below, let H and L be arbitrary elements of the shuffle mask
1939   // where H is in the range [4,7] and L is in the range [0,3].
1940   // H, 1, 2, 3 or L, 5, 6, 7
1941   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
1942       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
1943     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
1944     InsertAtByte = IsLE ? 12 : 0;
1945     Swap = M0 < 4;
1946     return true;
1947   }
1948   // 0, H, 2, 3 or 4, L, 6, 7
1949   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
1950       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
1951     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
1952     InsertAtByte = IsLE ? 8 : 4;
1953     Swap = M1 < 4;
1954     return true;
1955   }
1956   // 0, 1, H, 3 or 4, 5, L, 7
1957   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
1958       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
1959     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
1960     InsertAtByte = IsLE ? 4 : 8;
1961     Swap = M2 < 4;
1962     return true;
1963   }
1964   // 0, 1, 2, H or 4, 5, 6, L
1965   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
1966       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
1967     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
1968     InsertAtByte = IsLE ? 0 : 12;
1969     Swap = M3 < 4;
1970     return true;
1971   }
1972 
1973   // If both vector operands for the shuffle are the same vector, the mask will
1974   // contain only elements from the first one and the second one will be undef.
1975   if (N->getOperand(1).isUndef()) {
1976     ShiftElts = 0;
1977     Swap = true;
1978     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
1979     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
1980       InsertAtByte = IsLE ? 12 : 0;
1981       return true;
1982     }
1983     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
1984       InsertAtByte = IsLE ? 8 : 4;
1985       return true;
1986     }
1987     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
1988       InsertAtByte = IsLE ? 4 : 8;
1989       return true;
1990     }
1991     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
1992       InsertAtByte = IsLE ? 0 : 12;
1993       return true;
1994     }
1995   }
1996 
1997   return false;
1998 }
1999 
2000 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2001                                bool &Swap, bool IsLE) {
2002   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2003   // Ensure each byte index of the word is consecutive.
2004   if (!isNByteElemShuffleMask(N, 4, 1))
2005     return false;
2006 
2007   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2008   unsigned M0 = N->getMaskElt(0) / 4;
2009   unsigned M1 = N->getMaskElt(4) / 4;
2010   unsigned M2 = N->getMaskElt(8) / 4;
2011   unsigned M3 = N->getMaskElt(12) / 4;
2012 
2013   // If both vector operands for the shuffle are the same vector, the mask will
2014   // contain only elements from the first one and the second one will be undef.
2015   if (N->getOperand(1).isUndef()) {
2016     assert(M0 < 4 && "Indexing into an undef vector?");
2017     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2018       return false;
2019 
2020     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2021     Swap = false;
2022     return true;
2023   }
2024 
2025   // Ensure each word index of the ShuffleVector Mask is consecutive.
2026   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2027     return false;
2028 
2029   if (IsLE) {
2030     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2031       // Input vectors don't need to be swapped if the leading element
2032       // of the result is one of the 3 left elements of the second vector
2033       // (or if there is no shift to be done at all).
2034       Swap = false;
2035       ShiftElts = (8 - M0) % 8;
2036     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2037       // Input vectors need to be swapped if the leading element
2038       // of the result is one of the 3 left elements of the first vector
2039       // (or if we're shifting by 4 - thereby simply swapping the vectors).
2040       Swap = true;
2041       ShiftElts = (4 - M0) % 4;
2042     }
2043 
2044     return true;
2045   } else {                                          // BE
2046     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2047       // Input vectors don't need to be swapped if the leading element
2048       // of the result is one of the 4 elements of the first vector.
2049       Swap = false;
2050       ShiftElts = M0;
2051     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2052       // Input vectors need to be swapped if the leading element
2053       // of the result is one of the 4 elements of the right vector.
2054       Swap = true;
2055       ShiftElts = M0 - 4;
2056     }
2057 
2058     return true;
2059   }
2060 }
2061 
2062 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2063   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2064 
2065   if (!isNByteElemShuffleMask(N, Width, -1))
2066     return false;
2067 
2068   for (int i = 0; i < 16; i += Width)
2069     if (N->getMaskElt(i) != i + Width - 1)
2070       return false;
2071 
2072   return true;
2073 }
2074 
2075 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2076   return isXXBRShuffleMaskHelper(N, 2);
2077 }
2078 
2079 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2080   return isXXBRShuffleMaskHelper(N, 4);
2081 }
2082 
2083 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2084   return isXXBRShuffleMaskHelper(N, 8);
2085 }
2086 
2087 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2088   return isXXBRShuffleMaskHelper(N, 16);
2089 }
2090 
2091 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2092 /// if the inputs to the instruction should be swapped and set \p DM to the
2093 /// value for the immediate.
2094 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2095 /// AND element 0 of the result comes from the first input (LE) or second input
2096 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2097 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2098 /// mask.
2099 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2100                                bool &Swap, bool IsLE) {
2101   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2102 
2103   // Ensure each byte index of the double word is consecutive.
2104   if (!isNByteElemShuffleMask(N, 8, 1))
2105     return false;
2106 
2107   unsigned M0 = N->getMaskElt(0) / 8;
2108   unsigned M1 = N->getMaskElt(8) / 8;
2109   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2110 
2111   // If both vector operands for the shuffle are the same vector, the mask will
2112   // contain only elements from the first one and the second one will be undef.
2113   if (N->getOperand(1).isUndef()) {
2114     if ((M0 | M1) < 2) {
2115       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2116       Swap = false;
2117       return true;
2118     } else
2119       return false;
2120   }
2121 
2122   if (IsLE) {
2123     if (M0 > 1 && M1 < 2) {
2124       Swap = false;
2125     } else if (M0 < 2 && M1 > 1) {
2126       M0 = (M0 + 2) % 4;
2127       M1 = (M1 + 2) % 4;
2128       Swap = true;
2129     } else
2130       return false;
2131 
2132     // Note: if control flow comes here that means Swap is already set above
2133     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2134     return true;
2135   } else { // BE
2136     if (M0 < 2 && M1 > 1) {
2137       Swap = false;
2138     } else if (M0 > 1 && M1 < 2) {
2139       M0 = (M0 + 2) % 4;
2140       M1 = (M1 + 2) % 4;
2141       Swap = true;
2142     } else
2143       return false;
2144 
2145     // Note: if control flow comes here that means Swap is already set above
2146     DM = (M0 << 1) + (M1 & 1);
2147     return true;
2148   }
2149 }
2150 
2151 
2152 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2153 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2154 /// elements are counted from the left of the vector register).
2155 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2156                                          SelectionDAG &DAG) {
2157   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2158   assert(isSplatShuffleMask(SVOp, EltSize));
2159   if (DAG.getDataLayout().isLittleEndian())
2160     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2161   else
2162     return SVOp->getMaskElt(0) / EltSize;
2163 }
2164 
2165 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2166 /// by using a vspltis[bhw] instruction of the specified element size, return
2167 /// the constant being splatted.  The ByteSize field indicates the number of
2168 /// bytes of each element [124] -> [bhw].
2169 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2170   SDValue OpVal(nullptr, 0);
2171 
2172   // If ByteSize of the splat is bigger than the element size of the
2173   // build_vector, then we have a case where we are checking for a splat where
2174   // multiple elements of the buildvector are folded together into a single
2175   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2176   unsigned EltSize = 16/N->getNumOperands();
2177   if (EltSize < ByteSize) {
2178     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2179     SDValue UniquedVals[4];
2180     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2181 
2182     // See if all of the elements in the buildvector agree across.
2183     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2184       if (N->getOperand(i).isUndef()) continue;
2185       // If the element isn't a constant, bail fully out.
2186       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2187 
2188       if (!UniquedVals[i&(Multiple-1)].getNode())
2189         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2190       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2191         return SDValue();  // no match.
2192     }
2193 
2194     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2195     // either constant or undef values that are identical for each chunk.  See
2196     // if these chunks can form into a larger vspltis*.
2197 
2198     // Check to see if all of the leading entries are either 0 or -1.  If
2199     // neither, then this won't fit into the immediate field.
2200     bool LeadingZero = true;
2201     bool LeadingOnes = true;
2202     for (unsigned i = 0; i != Multiple-1; ++i) {
2203       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2204 
2205       LeadingZero &= isNullConstant(UniquedVals[i]);
2206       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2207     }
2208     // Finally, check the least significant entry.
2209     if (LeadingZero) {
2210       if (!UniquedVals[Multiple-1].getNode())
2211         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2212       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2213       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2214         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2215     }
2216     if (LeadingOnes) {
2217       if (!UniquedVals[Multiple-1].getNode())
2218         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2219       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2220       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2221         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2222     }
2223 
2224     return SDValue();
2225   }
2226 
2227   // Check to see if this buildvec has a single non-undef value in its elements.
2228   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2229     if (N->getOperand(i).isUndef()) continue;
2230     if (!OpVal.getNode())
2231       OpVal = N->getOperand(i);
2232     else if (OpVal != N->getOperand(i))
2233       return SDValue();
2234   }
2235 
2236   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2237 
2238   unsigned ValSizeInBytes = EltSize;
2239   uint64_t Value = 0;
2240   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2241     Value = CN->getZExtValue();
2242   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2243     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2244     Value = FloatToBits(CN->getValueAPF().convertToFloat());
2245   }
2246 
2247   // If the splat value is larger than the element value, then we can never do
2248   // this splat.  The only case that we could fit the replicated bits into our
2249   // immediate field for would be zero, and we prefer to use vxor for it.
2250   if (ValSizeInBytes < ByteSize) return SDValue();
2251 
2252   // If the element value is larger than the splat value, check if it consists
2253   // of a repeated bit pattern of size ByteSize.
2254   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2255     return SDValue();
2256 
2257   // Properly sign extend the value.
2258   int MaskVal = SignExtend32(Value, ByteSize * 8);
2259 
2260   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2261   if (MaskVal == 0) return SDValue();
2262 
2263   // Finally, if this value fits in a 5 bit sext field, return it
2264   if (SignExtend32<5>(MaskVal) == MaskVal)
2265     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2266   return SDValue();
2267 }
2268 
2269 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2270 /// amount, otherwise return -1.
2271 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2272   EVT VT = N->getValueType(0);
2273   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2274     return -1;
2275 
2276   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2277 
2278   // Find the first non-undef value in the shuffle mask.
2279   unsigned i;
2280   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2281     /*search*/;
2282 
2283   if (i == 4) return -1;  // all undef.
2284 
2285   // Otherwise, check to see if the rest of the elements are consecutively
2286   // numbered from this value.
2287   unsigned ShiftAmt = SVOp->getMaskElt(i);
2288   if (ShiftAmt < i) return -1;
2289   ShiftAmt -= i;
2290 
2291   // Check the rest of the elements to see if they are consecutive.
2292   for (++i; i != 4; ++i)
2293     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2294       return -1;
2295 
2296   return ShiftAmt;
2297 }
2298 
2299 //===----------------------------------------------------------------------===//
2300 //  Addressing Mode Selection
2301 //===----------------------------------------------------------------------===//
2302 
2303 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2304 /// or 64-bit immediate, and if the value can be accurately represented as a
2305 /// sign extension from a 16-bit value.  If so, this returns true and the
2306 /// immediate.
2307 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2308   if (!isa<ConstantSDNode>(N))
2309     return false;
2310 
2311   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2312   if (N->getValueType(0) == MVT::i32)
2313     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2314   else
2315     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2316 }
2317 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2318   return isIntS16Immediate(Op.getNode(), Imm);
2319 }
2320 
2321 
2322 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2323 /// be represented as an indexed [r+r] operation.
2324 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2325                                                SDValue &Index,
2326                                                SelectionDAG &DAG) const {
2327   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2328       UI != E; ++UI) {
2329     if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2330       if (Memop->getMemoryVT() == MVT::f64) {
2331           Base = N.getOperand(0);
2332           Index = N.getOperand(1);
2333           return true;
2334       }
2335     }
2336   }
2337   return false;
2338 }
2339 
2340 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2341 /// can be represented as an indexed [r+r] operation.  Returns false if it
2342 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2343 /// non-zero and N can be represented by a base register plus a signed 16-bit
2344 /// displacement, make a more precise judgement by checking (displacement % \p
2345 /// EncodingAlignment).
2346 bool PPCTargetLowering::SelectAddressRegReg(
2347     SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2348     MaybeAlign EncodingAlignment) const {
2349   // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2350   // a [pc+imm].
2351   if (SelectAddressPCRel(N, Base))
2352     return false;
2353 
2354   int16_t Imm = 0;
2355   if (N.getOpcode() == ISD::ADD) {
2356     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2357     // SPE load/store can only handle 8-bit offsets.
2358     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2359         return true;
2360     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2361         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2362       return false; // r+i
2363     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2364       return false;    // r+i
2365 
2366     Base = N.getOperand(0);
2367     Index = N.getOperand(1);
2368     return true;
2369   } else if (N.getOpcode() == ISD::OR) {
2370     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2371         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2372       return false; // r+i can fold it if we can.
2373 
2374     // If this is an or of disjoint bitfields, we can codegen this as an add
2375     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2376     // disjoint.
2377     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2378 
2379     if (LHSKnown.Zero.getBoolValue()) {
2380       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2381       // If all of the bits are known zero on the LHS or RHS, the add won't
2382       // carry.
2383       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2384         Base = N.getOperand(0);
2385         Index = N.getOperand(1);
2386         return true;
2387       }
2388     }
2389   }
2390 
2391   return false;
2392 }
2393 
2394 // If we happen to be doing an i64 load or store into a stack slot that has
2395 // less than a 4-byte alignment, then the frame-index elimination may need to
2396 // use an indexed load or store instruction (because the offset may not be a
2397 // multiple of 4). The extra register needed to hold the offset comes from the
2398 // register scavenger, and it is possible that the scavenger will need to use
2399 // an emergency spill slot. As a result, we need to make sure that a spill slot
2400 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2401 // stack slot.
2402 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2403   // FIXME: This does not handle the LWA case.
2404   if (VT != MVT::i64)
2405     return;
2406 
2407   // NOTE: We'll exclude negative FIs here, which come from argument
2408   // lowering, because there are no known test cases triggering this problem
2409   // using packed structures (or similar). We can remove this exclusion if
2410   // we find such a test case. The reason why this is so test-case driven is
2411   // because this entire 'fixup' is only to prevent crashes (from the
2412   // register scavenger) on not-really-valid inputs. For example, if we have:
2413   //   %a = alloca i1
2414   //   %b = bitcast i1* %a to i64*
2415   //   store i64* a, i64 b
2416   // then the store should really be marked as 'align 1', but is not. If it
2417   // were marked as 'align 1' then the indexed form would have been
2418   // instruction-selected initially, and the problem this 'fixup' is preventing
2419   // won't happen regardless.
2420   if (FrameIdx < 0)
2421     return;
2422 
2423   MachineFunction &MF = DAG.getMachineFunction();
2424   MachineFrameInfo &MFI = MF.getFrameInfo();
2425 
2426   if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2427     return;
2428 
2429   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2430   FuncInfo->setHasNonRISpills();
2431 }
2432 
2433 /// Returns true if the address N can be represented by a base register plus
2434 /// a signed 16-bit displacement [r+imm], and if it is not better
2435 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2436 /// displacements that are multiples of that value.
2437 bool PPCTargetLowering::SelectAddressRegImm(
2438     SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2439     MaybeAlign EncodingAlignment) const {
2440   // FIXME dl should come from parent load or store, not from address
2441   SDLoc dl(N);
2442 
2443   // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2444   // a [pc+imm].
2445   if (SelectAddressPCRel(N, Base))
2446     return false;
2447 
2448   // If this can be more profitably realized as r+r, fail.
2449   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2450     return false;
2451 
2452   if (N.getOpcode() == ISD::ADD) {
2453     int16_t imm = 0;
2454     if (isIntS16Immediate(N.getOperand(1), imm) &&
2455         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2456       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2457       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2458         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2459         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2460       } else {
2461         Base = N.getOperand(0);
2462       }
2463       return true; // [r+i]
2464     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2465       // Match LOAD (ADD (X, Lo(G))).
2466       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2467              && "Cannot handle constant offsets yet!");
2468       Disp = N.getOperand(1).getOperand(0);  // The global address.
2469       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2470              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2471              Disp.getOpcode() == ISD::TargetConstantPool ||
2472              Disp.getOpcode() == ISD::TargetJumpTable);
2473       Base = N.getOperand(0);
2474       return true;  // [&g+r]
2475     }
2476   } else if (N.getOpcode() == ISD::OR) {
2477     int16_t imm = 0;
2478     if (isIntS16Immediate(N.getOperand(1), imm) &&
2479         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2480       // If this is an or of disjoint bitfields, we can codegen this as an add
2481       // (for better address arithmetic) if the LHS and RHS of the OR are
2482       // provably disjoint.
2483       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2484 
2485       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2486         // If all of the bits are known zero on the LHS or RHS, the add won't
2487         // carry.
2488         if (FrameIndexSDNode *FI =
2489               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2490           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2491           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2492         } else {
2493           Base = N.getOperand(0);
2494         }
2495         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2496         return true;
2497       }
2498     }
2499   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2500     // Loading from a constant address.
2501 
2502     // If this address fits entirely in a 16-bit sext immediate field, codegen
2503     // this as "d, 0"
2504     int16_t Imm;
2505     if (isIntS16Immediate(CN, Imm) &&
2506         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2507       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2508       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2509                              CN->getValueType(0));
2510       return true;
2511     }
2512 
2513     // Handle 32-bit sext immediates with LIS + addr mode.
2514     if ((CN->getValueType(0) == MVT::i32 ||
2515          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2516         (!EncodingAlignment ||
2517          isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2518       int Addr = (int)CN->getZExtValue();
2519 
2520       // Otherwise, break this down into an LIS + disp.
2521       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2522 
2523       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2524                                    MVT::i32);
2525       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2526       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2527       return true;
2528     }
2529   }
2530 
2531   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2532   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2533     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2534     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2535   } else
2536     Base = N;
2537   return true;      // [r+0]
2538 }
2539 
2540 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2541 /// represented as an indexed [r+r] operation.
2542 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2543                                                 SDValue &Index,
2544                                                 SelectionDAG &DAG) const {
2545   // Check to see if we can easily represent this as an [r+r] address.  This
2546   // will fail if it thinks that the address is more profitably represented as
2547   // reg+imm, e.g. where imm = 0.
2548   if (SelectAddressRegReg(N, Base, Index, DAG))
2549     return true;
2550 
2551   // If the address is the result of an add, we will utilize the fact that the
2552   // address calculation includes an implicit add.  However, we can reduce
2553   // register pressure if we do not materialize a constant just for use as the
2554   // index register.  We only get rid of the add if it is not an add of a
2555   // value and a 16-bit signed constant and both have a single use.
2556   int16_t imm = 0;
2557   if (N.getOpcode() == ISD::ADD &&
2558       (!isIntS16Immediate(N.getOperand(1), imm) ||
2559        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2560     Base = N.getOperand(0);
2561     Index = N.getOperand(1);
2562     return true;
2563   }
2564 
2565   // Otherwise, do it the hard way, using R0 as the base register.
2566   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2567                          N.getValueType());
2568   Index = N;
2569   return true;
2570 }
2571 
2572 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2573   Ty *PCRelCand = dyn_cast<Ty>(N);
2574   return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2575 }
2576 
2577 /// Returns true if this address is a PC Relative address.
2578 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2579 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2580 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2581   // This is a materialize PC Relative node. Always select this as PC Relative.
2582   Base = N;
2583   if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2584     return true;
2585   if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2586       isValidPCRelNode<GlobalAddressSDNode>(N) ||
2587       isValidPCRelNode<JumpTableSDNode>(N) ||
2588       isValidPCRelNode<BlockAddressSDNode>(N))
2589     return true;
2590   return false;
2591 }
2592 
2593 /// Returns true if we should use a direct load into vector instruction
2594 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2595 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2596 
2597   // If there are any other uses other than scalar to vector, then we should
2598   // keep it as a scalar load -> direct move pattern to prevent multiple
2599   // loads.
2600   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2601   if (!LD)
2602     return false;
2603 
2604   EVT MemVT = LD->getMemoryVT();
2605   if (!MemVT.isSimple())
2606     return false;
2607   switch(MemVT.getSimpleVT().SimpleTy) {
2608   case MVT::i64:
2609     break;
2610   case MVT::i32:
2611     if (!ST.hasP8Vector())
2612       return false;
2613     break;
2614   case MVT::i16:
2615   case MVT::i8:
2616     if (!ST.hasP9Vector())
2617       return false;
2618     break;
2619   default:
2620     return false;
2621   }
2622 
2623   SDValue LoadedVal(N, 0);
2624   if (!LoadedVal.hasOneUse())
2625     return false;
2626 
2627   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2628        UI != UE; ++UI)
2629     if (UI.getUse().get().getResNo() == 0 &&
2630         UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2631         UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2632       return false;
2633 
2634   return true;
2635 }
2636 
2637 /// getPreIndexedAddressParts - returns true by value, base pointer and
2638 /// offset pointer and addressing mode by reference if the node's address
2639 /// can be legally represented as pre-indexed load / store address.
2640 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2641                                                   SDValue &Offset,
2642                                                   ISD::MemIndexedMode &AM,
2643                                                   SelectionDAG &DAG) const {
2644   if (DisablePPCPreinc) return false;
2645 
2646   bool isLoad = true;
2647   SDValue Ptr;
2648   EVT VT;
2649   unsigned Alignment;
2650   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2651     Ptr = LD->getBasePtr();
2652     VT = LD->getMemoryVT();
2653     Alignment = LD->getAlignment();
2654   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2655     Ptr = ST->getBasePtr();
2656     VT  = ST->getMemoryVT();
2657     Alignment = ST->getAlignment();
2658     isLoad = false;
2659   } else
2660     return false;
2661 
2662   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2663   // instructions because we can fold these into a more efficient instruction
2664   // instead, (such as LXSD).
2665   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2666     return false;
2667   }
2668 
2669   // PowerPC doesn't have preinc load/store instructions for vectors
2670   if (VT.isVector())
2671     return false;
2672 
2673   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2674     // Common code will reject creating a pre-inc form if the base pointer
2675     // is a frame index, or if N is a store and the base pointer is either
2676     // the same as or a predecessor of the value being stored.  Check for
2677     // those situations here, and try with swapped Base/Offset instead.
2678     bool Swap = false;
2679 
2680     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2681       Swap = true;
2682     else if (!isLoad) {
2683       SDValue Val = cast<StoreSDNode>(N)->getValue();
2684       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2685         Swap = true;
2686     }
2687 
2688     if (Swap)
2689       std::swap(Base, Offset);
2690 
2691     AM = ISD::PRE_INC;
2692     return true;
2693   }
2694 
2695   // LDU/STU can only handle immediates that are a multiple of 4.
2696   if (VT != MVT::i64) {
2697     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2698       return false;
2699   } else {
2700     // LDU/STU need an address with at least 4-byte alignment.
2701     if (Alignment < 4)
2702       return false;
2703 
2704     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2705       return false;
2706   }
2707 
2708   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2709     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2710     // sext i32 to i64 when addr mode is r+i.
2711     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2712         LD->getExtensionType() == ISD::SEXTLOAD &&
2713         isa<ConstantSDNode>(Offset))
2714       return false;
2715   }
2716 
2717   AM = ISD::PRE_INC;
2718   return true;
2719 }
2720 
2721 //===----------------------------------------------------------------------===//
2722 //  LowerOperation implementation
2723 //===----------------------------------------------------------------------===//
2724 
2725 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2726 /// and LoOpFlags to the target MO flags.
2727 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2728                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2729                                const GlobalValue *GV = nullptr) {
2730   HiOpFlags = PPCII::MO_HA;
2731   LoOpFlags = PPCII::MO_LO;
2732 
2733   // Don't use the pic base if not in PIC relocation model.
2734   if (IsPIC) {
2735     HiOpFlags |= PPCII::MO_PIC_FLAG;
2736     LoOpFlags |= PPCII::MO_PIC_FLAG;
2737   }
2738 }
2739 
2740 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2741                              SelectionDAG &DAG) {
2742   SDLoc DL(HiPart);
2743   EVT PtrVT = HiPart.getValueType();
2744   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2745 
2746   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2747   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2748 
2749   // With PIC, the first instruction is actually "GR+hi(&G)".
2750   if (isPIC)
2751     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2752                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2753 
2754   // Generate non-pic code that has direct accesses to the constant pool.
2755   // The address of the global is just (hi(&g)+lo(&g)).
2756   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2757 }
2758 
2759 static void setUsesTOCBasePtr(MachineFunction &MF) {
2760   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2761   FuncInfo->setUsesTOCBasePtr();
2762 }
2763 
2764 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2765   setUsesTOCBasePtr(DAG.getMachineFunction());
2766 }
2767 
2768 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2769                                        SDValue GA) const {
2770   const bool Is64Bit = Subtarget.isPPC64();
2771   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2772   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2773                         : Subtarget.isAIXABI()
2774                               ? DAG.getRegister(PPC::R2, VT)
2775                               : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2776   SDValue Ops[] = { GA, Reg };
2777   return DAG.getMemIntrinsicNode(
2778       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2779       MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2780       MachineMemOperand::MOLoad);
2781 }
2782 
2783 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2784                                              SelectionDAG &DAG) const {
2785   EVT PtrVT = Op.getValueType();
2786   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2787   const Constant *C = CP->getConstVal();
2788 
2789   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2790   // The actual address of the GlobalValue is stored in the TOC.
2791   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2792     if (Subtarget.isUsingPCRelativeCalls()) {
2793       SDLoc DL(CP);
2794       EVT Ty = getPointerTy(DAG.getDataLayout());
2795       SDValue ConstPool = DAG.getTargetConstantPool(
2796           C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2797       return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2798     }
2799     setUsesTOCBasePtr(DAG);
2800     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2801     return getTOCEntry(DAG, SDLoc(CP), GA);
2802   }
2803 
2804   unsigned MOHiFlag, MOLoFlag;
2805   bool IsPIC = isPositionIndependent();
2806   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2807 
2808   if (IsPIC && Subtarget.isSVR4ABI()) {
2809     SDValue GA =
2810         DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2811     return getTOCEntry(DAG, SDLoc(CP), GA);
2812   }
2813 
2814   SDValue CPIHi =
2815       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2816   SDValue CPILo =
2817       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2818   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2819 }
2820 
2821 // For 64-bit PowerPC, prefer the more compact relative encodings.
2822 // This trades 32 bits per jump table entry for one or two instructions
2823 // on the jump site.
2824 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2825   if (isJumpTableRelative())
2826     return MachineJumpTableInfo::EK_LabelDifference32;
2827 
2828   return TargetLowering::getJumpTableEncoding();
2829 }
2830 
2831 bool PPCTargetLowering::isJumpTableRelative() const {
2832   if (UseAbsoluteJumpTables)
2833     return false;
2834   if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2835     return true;
2836   return TargetLowering::isJumpTableRelative();
2837 }
2838 
2839 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2840                                                     SelectionDAG &DAG) const {
2841   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2842     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2843 
2844   switch (getTargetMachine().getCodeModel()) {
2845   case CodeModel::Small:
2846   case CodeModel::Medium:
2847     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2848   default:
2849     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2850                        getPointerTy(DAG.getDataLayout()));
2851   }
2852 }
2853 
2854 const MCExpr *
2855 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2856                                                 unsigned JTI,
2857                                                 MCContext &Ctx) const {
2858   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2859     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2860 
2861   switch (getTargetMachine().getCodeModel()) {
2862   case CodeModel::Small:
2863   case CodeModel::Medium:
2864     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2865   default:
2866     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2867   }
2868 }
2869 
2870 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2871   EVT PtrVT = Op.getValueType();
2872   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2873 
2874   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2875   if (Subtarget.isUsingPCRelativeCalls()) {
2876     SDLoc DL(JT);
2877     EVT Ty = getPointerTy(DAG.getDataLayout());
2878     SDValue GA =
2879         DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
2880     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2881     return MatAddr;
2882   }
2883 
2884   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2885   // The actual address of the GlobalValue is stored in the TOC.
2886   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2887     setUsesTOCBasePtr(DAG);
2888     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2889     return getTOCEntry(DAG, SDLoc(JT), GA);
2890   }
2891 
2892   unsigned MOHiFlag, MOLoFlag;
2893   bool IsPIC = isPositionIndependent();
2894   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2895 
2896   if (IsPIC && Subtarget.isSVR4ABI()) {
2897     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2898                                         PPCII::MO_PIC_FLAG);
2899     return getTOCEntry(DAG, SDLoc(GA), GA);
2900   }
2901 
2902   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2903   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2904   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2905 }
2906 
2907 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2908                                              SelectionDAG &DAG) const {
2909   EVT PtrVT = Op.getValueType();
2910   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2911   const BlockAddress *BA = BASDN->getBlockAddress();
2912 
2913   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2914   if (Subtarget.isUsingPCRelativeCalls()) {
2915     SDLoc DL(BASDN);
2916     EVT Ty = getPointerTy(DAG.getDataLayout());
2917     SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
2918                                            PPCII::MO_PCREL_FLAG);
2919     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2920     return MatAddr;
2921   }
2922 
2923   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2924   // The actual BlockAddress is stored in the TOC.
2925   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2926     setUsesTOCBasePtr(DAG);
2927     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2928     return getTOCEntry(DAG, SDLoc(BASDN), GA);
2929   }
2930 
2931   // 32-bit position-independent ELF stores the BlockAddress in the .got.
2932   if (Subtarget.is32BitELFABI() && isPositionIndependent())
2933     return getTOCEntry(
2934         DAG, SDLoc(BASDN),
2935         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
2936 
2937   unsigned MOHiFlag, MOLoFlag;
2938   bool IsPIC = isPositionIndependent();
2939   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2940   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2941   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2942   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
2943 }
2944 
2945 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2946                                               SelectionDAG &DAG) const {
2947   // FIXME: TLS addresses currently use medium model code sequences,
2948   // which is the most useful form.  Eventually support for small and
2949   // large models could be added if users need it, at the cost of
2950   // additional complexity.
2951   if (Subtarget.isUsingPCRelativeCalls() && !EnablePPCPCRelTLS)
2952     report_fatal_error("Thread local storage is not supported with pc-relative"
2953                        " addressing - please compile with -mno-pcrel");
2954   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2955   if (DAG.getTarget().useEmulatedTLS())
2956     return LowerToTLSEmulatedModel(GA, DAG);
2957 
2958   SDLoc dl(GA);
2959   const GlobalValue *GV = GA->getGlobal();
2960   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2961   bool is64bit = Subtarget.isPPC64();
2962   const Module *M = DAG.getMachineFunction().getFunction().getParent();
2963   PICLevel::Level picLevel = M->getPICLevel();
2964 
2965   const TargetMachine &TM = getTargetMachine();
2966   TLSModel::Model Model = TM.getTLSModel(GV);
2967 
2968   if (Model == TLSModel::LocalExec) {
2969     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2970                                                PPCII::MO_TPREL_HA);
2971     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2972                                                PPCII::MO_TPREL_LO);
2973     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
2974                              : DAG.getRegister(PPC::R2, MVT::i32);
2975 
2976     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2977     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2978   }
2979 
2980   if (Model == TLSModel::InitialExec) {
2981     bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
2982     SDValue TGA = DAG.getTargetGlobalAddress(
2983         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
2984     SDValue TGATLS = DAG.getTargetGlobalAddress(
2985         GV, dl, PtrVT, 0,
2986         IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
2987     SDValue TPOffset;
2988     if (IsPCRel) {
2989       SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
2990       TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
2991                              MachinePointerInfo());
2992     } else {
2993       SDValue GOTPtr;
2994       if (is64bit) {
2995         setUsesTOCBasePtr(DAG);
2996         SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2997         GOTPtr =
2998             DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
2999       } else {
3000         if (!TM.isPositionIndependent())
3001           GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3002         else if (picLevel == PICLevel::SmallPIC)
3003           GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3004         else
3005           GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3006       }
3007       TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3008     }
3009     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3010   }
3011 
3012   if (Model == TLSModel::GeneralDynamic) {
3013     if (Subtarget.isUsingPCRelativeCalls()) {
3014       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3015                                                PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3016       return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3017     }
3018 
3019     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3020     SDValue GOTPtr;
3021     if (is64bit) {
3022       setUsesTOCBasePtr(DAG);
3023       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3024       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3025                                    GOTReg, TGA);
3026     } else {
3027       if (picLevel == PICLevel::SmallPIC)
3028         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3029       else
3030         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3031     }
3032     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3033                        GOTPtr, TGA, TGA);
3034   }
3035 
3036   if (Model == TLSModel::LocalDynamic) {
3037     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3038     SDValue GOTPtr;
3039     if (is64bit) {
3040       setUsesTOCBasePtr(DAG);
3041       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3042       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3043                            GOTReg, TGA);
3044     } else {
3045       if (picLevel == PICLevel::SmallPIC)
3046         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3047       else
3048         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3049     }
3050     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3051                                   PtrVT, GOTPtr, TGA, TGA);
3052     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3053                                       PtrVT, TLSAddr, TGA);
3054     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3055   }
3056 
3057   llvm_unreachable("Unknown TLS model!");
3058 }
3059 
3060 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3061                                               SelectionDAG &DAG) const {
3062   EVT PtrVT = Op.getValueType();
3063   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3064   SDLoc DL(GSDN);
3065   const GlobalValue *GV = GSDN->getGlobal();
3066 
3067   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3068   // The actual address of the GlobalValue is stored in the TOC.
3069   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3070     if (Subtarget.isUsingPCRelativeCalls()) {
3071       EVT Ty = getPointerTy(DAG.getDataLayout());
3072       if (isAccessedAsGotIndirect(Op)) {
3073         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3074                                                 PPCII::MO_PCREL_FLAG |
3075                                                     PPCII::MO_GOT_FLAG);
3076         SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3077         SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3078                                    MachinePointerInfo());
3079         return Load;
3080       } else {
3081         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3082                                                 PPCII::MO_PCREL_FLAG);
3083         return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3084       }
3085     }
3086     setUsesTOCBasePtr(DAG);
3087     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3088     return getTOCEntry(DAG, DL, GA);
3089   }
3090 
3091   unsigned MOHiFlag, MOLoFlag;
3092   bool IsPIC = isPositionIndependent();
3093   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3094 
3095   if (IsPIC && Subtarget.isSVR4ABI()) {
3096     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3097                                             GSDN->getOffset(),
3098                                             PPCII::MO_PIC_FLAG);
3099     return getTOCEntry(DAG, DL, GA);
3100   }
3101 
3102   SDValue GAHi =
3103     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3104   SDValue GALo =
3105     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3106 
3107   return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3108 }
3109 
3110 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3111   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3112   SDLoc dl(Op);
3113 
3114   if (Op.getValueType() == MVT::v2i64) {
3115     // When the operands themselves are v2i64 values, we need to do something
3116     // special because VSX has no underlying comparison operations for these.
3117     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3118       // Equality can be handled by casting to the legal type for Altivec
3119       // comparisons, everything else needs to be expanded.
3120       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3121         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3122                  DAG.getSetCC(dl, MVT::v4i32,
3123                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3124                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3125                    CC));
3126       }
3127 
3128       return SDValue();
3129     }
3130 
3131     // We handle most of these in the usual way.
3132     return Op;
3133   }
3134 
3135   // If we're comparing for equality to zero, expose the fact that this is
3136   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3137   // fold the new nodes.
3138   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3139     return V;
3140 
3141   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3142     // Leave comparisons against 0 and -1 alone for now, since they're usually
3143     // optimized.  FIXME: revisit this when we can custom lower all setcc
3144     // optimizations.
3145     if (C->isAllOnesValue() || C->isNullValue())
3146       return SDValue();
3147   }
3148 
3149   // If we have an integer seteq/setne, turn it into a compare against zero
3150   // by xor'ing the rhs with the lhs, which is faster than setting a
3151   // condition register, reading it back out, and masking the correct bit.  The
3152   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3153   // the result to other bit-twiddling opportunities.
3154   EVT LHSVT = Op.getOperand(0).getValueType();
3155   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3156     EVT VT = Op.getValueType();
3157     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3158                                 Op.getOperand(1));
3159     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3160   }
3161   return SDValue();
3162 }
3163 
3164 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3165   SDNode *Node = Op.getNode();
3166   EVT VT = Node->getValueType(0);
3167   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3168   SDValue InChain = Node->getOperand(0);
3169   SDValue VAListPtr = Node->getOperand(1);
3170   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3171   SDLoc dl(Node);
3172 
3173   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3174 
3175   // gpr_index
3176   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3177                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3178   InChain = GprIndex.getValue(1);
3179 
3180   if (VT == MVT::i64) {
3181     // Check if GprIndex is even
3182     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3183                                  DAG.getConstant(1, dl, MVT::i32));
3184     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3185                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3186     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3187                                           DAG.getConstant(1, dl, MVT::i32));
3188     // Align GprIndex to be even if it isn't
3189     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3190                            GprIndex);
3191   }
3192 
3193   // fpr index is 1 byte after gpr
3194   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3195                                DAG.getConstant(1, dl, MVT::i32));
3196 
3197   // fpr
3198   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3199                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3200   InChain = FprIndex.getValue(1);
3201 
3202   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3203                                        DAG.getConstant(8, dl, MVT::i32));
3204 
3205   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3206                                         DAG.getConstant(4, dl, MVT::i32));
3207 
3208   // areas
3209   SDValue OverflowArea =
3210       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3211   InChain = OverflowArea.getValue(1);
3212 
3213   SDValue RegSaveArea =
3214       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3215   InChain = RegSaveArea.getValue(1);
3216 
3217   // select overflow_area if index > 8
3218   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3219                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3220 
3221   // adjustment constant gpr_index * 4/8
3222   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3223                                     VT.isInteger() ? GprIndex : FprIndex,
3224                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3225                                                     MVT::i32));
3226 
3227   // OurReg = RegSaveArea + RegConstant
3228   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3229                                RegConstant);
3230 
3231   // Floating types are 32 bytes into RegSaveArea
3232   if (VT.isFloatingPoint())
3233     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3234                          DAG.getConstant(32, dl, MVT::i32));
3235 
3236   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3237   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3238                                    VT.isInteger() ? GprIndex : FprIndex,
3239                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3240                                                    MVT::i32));
3241 
3242   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3243                               VT.isInteger() ? VAListPtr : FprPtr,
3244                               MachinePointerInfo(SV), MVT::i8);
3245 
3246   // determine if we should load from reg_save_area or overflow_area
3247   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3248 
3249   // increase overflow_area by 4/8 if gpr/fpr > 8
3250   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3251                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3252                                           dl, MVT::i32));
3253 
3254   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3255                              OverflowAreaPlusN);
3256 
3257   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3258                               MachinePointerInfo(), MVT::i32);
3259 
3260   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3261 }
3262 
3263 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3264   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3265 
3266   // We have to copy the entire va_list struct:
3267   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3268   return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3269                        DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3270                        false, true, false, MachinePointerInfo(),
3271                        MachinePointerInfo());
3272 }
3273 
3274 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3275                                                   SelectionDAG &DAG) const {
3276   if (Subtarget.isAIXABI())
3277     report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3278 
3279   return Op.getOperand(0);
3280 }
3281 
3282 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3283                                                 SelectionDAG &DAG) const {
3284   if (Subtarget.isAIXABI())
3285     report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3286 
3287   SDValue Chain = Op.getOperand(0);
3288   SDValue Trmp = Op.getOperand(1); // trampoline
3289   SDValue FPtr = Op.getOperand(2); // nested function
3290   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3291   SDLoc dl(Op);
3292 
3293   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3294   bool isPPC64 = (PtrVT == MVT::i64);
3295   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3296 
3297   TargetLowering::ArgListTy Args;
3298   TargetLowering::ArgListEntry Entry;
3299 
3300   Entry.Ty = IntPtrTy;
3301   Entry.Node = Trmp; Args.push_back(Entry);
3302 
3303   // TrampSize == (isPPC64 ? 48 : 40);
3304   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3305                                isPPC64 ? MVT::i64 : MVT::i32);
3306   Args.push_back(Entry);
3307 
3308   Entry.Node = FPtr; Args.push_back(Entry);
3309   Entry.Node = Nest; Args.push_back(Entry);
3310 
3311   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3312   TargetLowering::CallLoweringInfo CLI(DAG);
3313   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3314       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3315       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3316 
3317   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3318   return CallResult.second;
3319 }
3320 
3321 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3322   MachineFunction &MF = DAG.getMachineFunction();
3323   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3324   EVT PtrVT = getPointerTy(MF.getDataLayout());
3325 
3326   SDLoc dl(Op);
3327 
3328   if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3329     // vastart just stores the address of the VarArgsFrameIndex slot into the
3330     // memory location argument.
3331     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3332     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3333     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3334                         MachinePointerInfo(SV));
3335   }
3336 
3337   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3338   // We suppose the given va_list is already allocated.
3339   //
3340   // typedef struct {
3341   //  char gpr;     /* index into the array of 8 GPRs
3342   //                 * stored in the register save area
3343   //                 * gpr=0 corresponds to r3,
3344   //                 * gpr=1 to r4, etc.
3345   //                 */
3346   //  char fpr;     /* index into the array of 8 FPRs
3347   //                 * stored in the register save area
3348   //                 * fpr=0 corresponds to f1,
3349   //                 * fpr=1 to f2, etc.
3350   //                 */
3351   //  char *overflow_arg_area;
3352   //                /* location on stack that holds
3353   //                 * the next overflow argument
3354   //                 */
3355   //  char *reg_save_area;
3356   //               /* where r3:r10 and f1:f8 (if saved)
3357   //                * are stored
3358   //                */
3359   // } va_list[1];
3360 
3361   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3362   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3363   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3364                                             PtrVT);
3365   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3366                                  PtrVT);
3367 
3368   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3369   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3370 
3371   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3372   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3373 
3374   uint64_t FPROffset = 1;
3375   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3376 
3377   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3378 
3379   // Store first byte : number of int regs
3380   SDValue firstStore =
3381       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3382                         MachinePointerInfo(SV), MVT::i8);
3383   uint64_t nextOffset = FPROffset;
3384   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3385                                   ConstFPROffset);
3386 
3387   // Store second byte : number of float regs
3388   SDValue secondStore =
3389       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3390                         MachinePointerInfo(SV, nextOffset), MVT::i8);
3391   nextOffset += StackOffset;
3392   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3393 
3394   // Store second word : arguments given on stack
3395   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3396                                     MachinePointerInfo(SV, nextOffset));
3397   nextOffset += FrameOffset;
3398   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3399 
3400   // Store third word : arguments given in registers
3401   return DAG.getStore(thirdStore, dl, FR, nextPtr,
3402                       MachinePointerInfo(SV, nextOffset));
3403 }
3404 
3405 /// FPR - The set of FP registers that should be allocated for arguments
3406 /// on Darwin and AIX.
3407 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3408                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3409                                 PPC::F11, PPC::F12, PPC::F13};
3410 
3411 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3412 /// the stack.
3413 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3414                                        unsigned PtrByteSize) {
3415   unsigned ArgSize = ArgVT.getStoreSize();
3416   if (Flags.isByVal())
3417     ArgSize = Flags.getByValSize();
3418 
3419   // Round up to multiples of the pointer size, except for array members,
3420   // which are always packed.
3421   if (!Flags.isInConsecutiveRegs())
3422     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3423 
3424   return ArgSize;
3425 }
3426 
3427 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3428 /// on the stack.
3429 static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3430                                          ISD::ArgFlagsTy Flags,
3431                                          unsigned PtrByteSize) {
3432   Align Alignment(PtrByteSize);
3433 
3434   // Altivec parameters are padded to a 16 byte boundary.
3435   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3436       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3437       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3438       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3439     Alignment = Align(16);
3440 
3441   // ByVal parameters are aligned as requested.
3442   if (Flags.isByVal()) {
3443     auto BVAlign = Flags.getNonZeroByValAlign();
3444     if (BVAlign > PtrByteSize) {
3445       if (BVAlign.value() % PtrByteSize != 0)
3446         llvm_unreachable(
3447             "ByVal alignment is not a multiple of the pointer size");
3448 
3449       Alignment = BVAlign;
3450     }
3451   }
3452 
3453   // Array members are always packed to their original alignment.
3454   if (Flags.isInConsecutiveRegs()) {
3455     // If the array member was split into multiple registers, the first
3456     // needs to be aligned to the size of the full type.  (Except for
3457     // ppcf128, which is only aligned as its f64 components.)
3458     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3459       Alignment = Align(OrigVT.getStoreSize());
3460     else
3461       Alignment = Align(ArgVT.getStoreSize());
3462   }
3463 
3464   return Alignment;
3465 }
3466 
3467 /// CalculateStackSlotUsed - Return whether this argument will use its
3468 /// stack slot (instead of being passed in registers).  ArgOffset,
3469 /// AvailableFPRs, and AvailableVRs must hold the current argument
3470 /// position, and will be updated to account for this argument.
3471 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3472                                    unsigned PtrByteSize, unsigned LinkageSize,
3473                                    unsigned ParamAreaSize, unsigned &ArgOffset,
3474                                    unsigned &AvailableFPRs,
3475                                    unsigned &AvailableVRs) {
3476   bool UseMemory = false;
3477 
3478   // Respect alignment of argument on the stack.
3479   Align Alignment =
3480       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3481   ArgOffset = alignTo(ArgOffset, Alignment);
3482   // If there's no space left in the argument save area, we must
3483   // use memory (this check also catches zero-sized arguments).
3484   if (ArgOffset >= LinkageSize + ParamAreaSize)
3485     UseMemory = true;
3486 
3487   // Allocate argument on the stack.
3488   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3489   if (Flags.isInConsecutiveRegsLast())
3490     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3491   // If we overran the argument save area, we must use memory
3492   // (this check catches arguments passed partially in memory)
3493   if (ArgOffset > LinkageSize + ParamAreaSize)
3494     UseMemory = true;
3495 
3496   // However, if the argument is actually passed in an FPR or a VR,
3497   // we don't use memory after all.
3498   if (!Flags.isByVal()) {
3499     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3500       if (AvailableFPRs > 0) {
3501         --AvailableFPRs;
3502         return false;
3503       }
3504     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3505         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3506         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3507         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3508       if (AvailableVRs > 0) {
3509         --AvailableVRs;
3510         return false;
3511       }
3512   }
3513 
3514   return UseMemory;
3515 }
3516 
3517 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3518 /// ensure minimum alignment required for target.
3519 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3520                                      unsigned NumBytes) {
3521   return alignTo(NumBytes, Lowering->getStackAlign());
3522 }
3523 
3524 SDValue PPCTargetLowering::LowerFormalArguments(
3525     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3526     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3527     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3528   if (Subtarget.isAIXABI())
3529     return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3530                                     InVals);
3531   if (Subtarget.is64BitELFABI())
3532     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3533                                        InVals);
3534   if (Subtarget.is32BitELFABI())
3535     return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3536                                        InVals);
3537 
3538   return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3539                                      InVals);
3540 }
3541 
3542 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3543     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3544     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3545     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3546 
3547   // 32-bit SVR4 ABI Stack Frame Layout:
3548   //              +-----------------------------------+
3549   //        +-->  |            Back chain             |
3550   //        |     +-----------------------------------+
3551   //        |     | Floating-point register save area |
3552   //        |     +-----------------------------------+
3553   //        |     |    General register save area     |
3554   //        |     +-----------------------------------+
3555   //        |     |          CR save word             |
3556   //        |     +-----------------------------------+
3557   //        |     |         VRSAVE save word          |
3558   //        |     +-----------------------------------+
3559   //        |     |         Alignment padding         |
3560   //        |     +-----------------------------------+
3561   //        |     |     Vector register save area     |
3562   //        |     +-----------------------------------+
3563   //        |     |       Local variable space        |
3564   //        |     +-----------------------------------+
3565   //        |     |        Parameter list area        |
3566   //        |     +-----------------------------------+
3567   //        |     |           LR save word            |
3568   //        |     +-----------------------------------+
3569   // SP-->  +---  |            Back chain             |
3570   //              +-----------------------------------+
3571   //
3572   // Specifications:
3573   //   System V Application Binary Interface PowerPC Processor Supplement
3574   //   AltiVec Technology Programming Interface Manual
3575 
3576   MachineFunction &MF = DAG.getMachineFunction();
3577   MachineFrameInfo &MFI = MF.getFrameInfo();
3578   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3579 
3580   EVT PtrVT = getPointerTy(MF.getDataLayout());
3581   // Potential tail calls could cause overwriting of argument stack slots.
3582   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3583                        (CallConv == CallingConv::Fast));
3584   const Align PtrAlign(4);
3585 
3586   // Assign locations to all of the incoming arguments.
3587   SmallVector<CCValAssign, 16> ArgLocs;
3588   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3589                  *DAG.getContext());
3590 
3591   // Reserve space for the linkage area on the stack.
3592   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3593   CCInfo.AllocateStack(LinkageSize, PtrAlign);
3594   if (useSoftFloat())
3595     CCInfo.PreAnalyzeFormalArguments(Ins);
3596 
3597   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3598   CCInfo.clearWasPPCF128();
3599 
3600   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3601     CCValAssign &VA = ArgLocs[i];
3602 
3603     // Arguments stored in registers.
3604     if (VA.isRegLoc()) {
3605       const TargetRegisterClass *RC;
3606       EVT ValVT = VA.getValVT();
3607 
3608       switch (ValVT.getSimpleVT().SimpleTy) {
3609         default:
3610           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3611         case MVT::i1:
3612         case MVT::i32:
3613           RC = &PPC::GPRCRegClass;
3614           break;
3615         case MVT::f32:
3616           if (Subtarget.hasP8Vector())
3617             RC = &PPC::VSSRCRegClass;
3618           else if (Subtarget.hasSPE())
3619             RC = &PPC::GPRCRegClass;
3620           else
3621             RC = &PPC::F4RCRegClass;
3622           break;
3623         case MVT::f64:
3624           if (Subtarget.hasVSX())
3625             RC = &PPC::VSFRCRegClass;
3626           else if (Subtarget.hasSPE())
3627             // SPE passes doubles in GPR pairs.
3628             RC = &PPC::GPRCRegClass;
3629           else
3630             RC = &PPC::F8RCRegClass;
3631           break;
3632         case MVT::v16i8:
3633         case MVT::v8i16:
3634         case MVT::v4i32:
3635           RC = &PPC::VRRCRegClass;
3636           break;
3637         case MVT::v4f32:
3638           RC = &PPC::VRRCRegClass;
3639           break;
3640         case MVT::v2f64:
3641         case MVT::v2i64:
3642           RC = &PPC::VRRCRegClass;
3643           break;
3644       }
3645 
3646       SDValue ArgValue;
3647       // Transform the arguments stored in physical registers into
3648       // virtual ones.
3649       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3650         assert(i + 1 < e && "No second half of double precision argument");
3651         unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3652         unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3653         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3654         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3655         if (!Subtarget.isLittleEndian())
3656           std::swap (ArgValueLo, ArgValueHi);
3657         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3658                                ArgValueHi);
3659       } else {
3660         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3661         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3662                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
3663         if (ValVT == MVT::i1)
3664           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3665       }
3666 
3667       InVals.push_back(ArgValue);
3668     } else {
3669       // Argument stored in memory.
3670       assert(VA.isMemLoc());
3671 
3672       // Get the extended size of the argument type in stack
3673       unsigned ArgSize = VA.getLocVT().getStoreSize();
3674       // Get the actual size of the argument type
3675       unsigned ObjSize = VA.getValVT().getStoreSize();
3676       unsigned ArgOffset = VA.getLocMemOffset();
3677       // Stack objects in PPC32 are right justified.
3678       ArgOffset += ArgSize - ObjSize;
3679       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3680 
3681       // Create load nodes to retrieve arguments from the stack.
3682       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3683       InVals.push_back(
3684           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3685     }
3686   }
3687 
3688   // Assign locations to all of the incoming aggregate by value arguments.
3689   // Aggregates passed by value are stored in the local variable space of the
3690   // caller's stack frame, right above the parameter list area.
3691   SmallVector<CCValAssign, 16> ByValArgLocs;
3692   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3693                       ByValArgLocs, *DAG.getContext());
3694 
3695   // Reserve stack space for the allocations in CCInfo.
3696   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3697 
3698   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3699 
3700   // Area that is at least reserved in the caller of this function.
3701   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3702   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3703 
3704   // Set the size that is at least reserved in caller of this function.  Tail
3705   // call optimized function's reserved stack space needs to be aligned so that
3706   // taking the difference between two stack areas will result in an aligned
3707   // stack.
3708   MinReservedArea =
3709       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3710   FuncInfo->setMinReservedArea(MinReservedArea);
3711 
3712   SmallVector<SDValue, 8> MemOps;
3713 
3714   // If the function takes variable number of arguments, make a frame index for
3715   // the start of the first vararg value... for expansion of llvm.va_start.
3716   if (isVarArg) {
3717     static const MCPhysReg GPArgRegs[] = {
3718       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3719       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3720     };
3721     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3722 
3723     static const MCPhysReg FPArgRegs[] = {
3724       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3725       PPC::F8
3726     };
3727     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3728 
3729     if (useSoftFloat() || hasSPE())
3730        NumFPArgRegs = 0;
3731 
3732     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3733     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3734 
3735     // Make room for NumGPArgRegs and NumFPArgRegs.
3736     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3737                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3738 
3739     FuncInfo->setVarArgsStackOffset(
3740       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3741                             CCInfo.getNextStackOffset(), true));
3742 
3743     FuncInfo->setVarArgsFrameIndex(
3744         MFI.CreateStackObject(Depth, Align(8), false));
3745     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3746 
3747     // The fixed integer arguments of a variadic function are stored to the
3748     // VarArgsFrameIndex on the stack so that they may be loaded by
3749     // dereferencing the result of va_next.
3750     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3751       // Get an existing live-in vreg, or add a new one.
3752       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3753       if (!VReg)
3754         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3755 
3756       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3757       SDValue Store =
3758           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3759       MemOps.push_back(Store);
3760       // Increment the address by four for the next argument to store
3761       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3762       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3763     }
3764 
3765     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3766     // is set.
3767     // The double arguments are stored to the VarArgsFrameIndex
3768     // on the stack.
3769     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3770       // Get an existing live-in vreg, or add a new one.
3771       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3772       if (!VReg)
3773         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3774 
3775       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3776       SDValue Store =
3777           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3778       MemOps.push_back(Store);
3779       // Increment the address by eight for the next argument to store
3780       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3781                                          PtrVT);
3782       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3783     }
3784   }
3785 
3786   if (!MemOps.empty())
3787     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3788 
3789   return Chain;
3790 }
3791 
3792 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3793 // value to MVT::i64 and then truncate to the correct register size.
3794 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3795                                              EVT ObjectVT, SelectionDAG &DAG,
3796                                              SDValue ArgVal,
3797                                              const SDLoc &dl) const {
3798   if (Flags.isSExt())
3799     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3800                          DAG.getValueType(ObjectVT));
3801   else if (Flags.isZExt())
3802     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3803                          DAG.getValueType(ObjectVT));
3804 
3805   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3806 }
3807 
3808 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3809     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3810     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3811     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3812   // TODO: add description of PPC stack frame format, or at least some docs.
3813   //
3814   bool isELFv2ABI = Subtarget.isELFv2ABI();
3815   bool isLittleEndian = Subtarget.isLittleEndian();
3816   MachineFunction &MF = DAG.getMachineFunction();
3817   MachineFrameInfo &MFI = MF.getFrameInfo();
3818   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3819 
3820   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3821          "fastcc not supported on varargs functions");
3822 
3823   EVT PtrVT = getPointerTy(MF.getDataLayout());
3824   // Potential tail calls could cause overwriting of argument stack slots.
3825   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3826                        (CallConv == CallingConv::Fast));
3827   unsigned PtrByteSize = 8;
3828   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3829 
3830   static const MCPhysReg GPR[] = {
3831     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3832     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3833   };
3834   static const MCPhysReg VR[] = {
3835     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3836     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3837   };
3838 
3839   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3840   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3841   const unsigned Num_VR_Regs  = array_lengthof(VR);
3842 
3843   // Do a first pass over the arguments to determine whether the ABI
3844   // guarantees that our caller has allocated the parameter save area
3845   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3846   // in the ELFv2 ABI, it is true if this is a vararg function or if
3847   // any parameter is located in a stack slot.
3848 
3849   bool HasParameterArea = !isELFv2ABI || isVarArg;
3850   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3851   unsigned NumBytes = LinkageSize;
3852   unsigned AvailableFPRs = Num_FPR_Regs;
3853   unsigned AvailableVRs = Num_VR_Regs;
3854   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3855     if (Ins[i].Flags.isNest())
3856       continue;
3857 
3858     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3859                                PtrByteSize, LinkageSize, ParamAreaSize,
3860                                NumBytes, AvailableFPRs, AvailableVRs))
3861       HasParameterArea = true;
3862   }
3863 
3864   // Add DAG nodes to load the arguments or copy them out of registers.  On
3865   // entry to a function on PPC, the arguments start after the linkage area,
3866   // although the first ones are often in registers.
3867 
3868   unsigned ArgOffset = LinkageSize;
3869   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3870   SmallVector<SDValue, 8> MemOps;
3871   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3872   unsigned CurArgIdx = 0;
3873   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3874     SDValue ArgVal;
3875     bool needsLoad = false;
3876     EVT ObjectVT = Ins[ArgNo].VT;
3877     EVT OrigVT = Ins[ArgNo].ArgVT;
3878     unsigned ObjSize = ObjectVT.getStoreSize();
3879     unsigned ArgSize = ObjSize;
3880     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3881     if (Ins[ArgNo].isOrigArg()) {
3882       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3883       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3884     }
3885     // We re-align the argument offset for each argument, except when using the
3886     // fast calling convention, when we need to make sure we do that only when
3887     // we'll actually use a stack slot.
3888     unsigned CurArgOffset;
3889     Align Alignment;
3890     auto ComputeArgOffset = [&]() {
3891       /* Respect alignment of argument on the stack.  */
3892       Alignment =
3893           CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3894       ArgOffset = alignTo(ArgOffset, Alignment);
3895       CurArgOffset = ArgOffset;
3896     };
3897 
3898     if (CallConv != CallingConv::Fast) {
3899       ComputeArgOffset();
3900 
3901       /* Compute GPR index associated with argument offset.  */
3902       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3903       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3904     }
3905 
3906     // FIXME the codegen can be much improved in some cases.
3907     // We do not have to keep everything in memory.
3908     if (Flags.isByVal()) {
3909       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3910 
3911       if (CallConv == CallingConv::Fast)
3912         ComputeArgOffset();
3913 
3914       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3915       ObjSize = Flags.getByValSize();
3916       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3917       // Empty aggregate parameters do not take up registers.  Examples:
3918       //   struct { } a;
3919       //   union  { } b;
3920       //   int c[0];
3921       // etc.  However, we have to provide a place-holder in InVals, so
3922       // pretend we have an 8-byte item at the current address for that
3923       // purpose.
3924       if (!ObjSize) {
3925         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
3926         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3927         InVals.push_back(FIN);
3928         continue;
3929       }
3930 
3931       // Create a stack object covering all stack doublewords occupied
3932       // by the argument.  If the argument is (fully or partially) on
3933       // the stack, or if the argument is fully in registers but the
3934       // caller has allocated the parameter save anyway, we can refer
3935       // directly to the caller's stack frame.  Otherwise, create a
3936       // local copy in our own frame.
3937       int FI;
3938       if (HasParameterArea ||
3939           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3940         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
3941       else
3942         FI = MFI.CreateStackObject(ArgSize, Alignment, false);
3943       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3944 
3945       // Handle aggregates smaller than 8 bytes.
3946       if (ObjSize < PtrByteSize) {
3947         // The value of the object is its address, which differs from the
3948         // address of the enclosing doubleword on big-endian systems.
3949         SDValue Arg = FIN;
3950         if (!isLittleEndian) {
3951           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3952           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3953         }
3954         InVals.push_back(Arg);
3955 
3956         if (GPR_idx != Num_GPR_Regs) {
3957           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3958           FuncInfo->addLiveInAttr(VReg, Flags);
3959           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3960           SDValue Store;
3961 
3962           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3963             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3964                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
3965             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3966                                       MachinePointerInfo(&*FuncArg), ObjType);
3967           } else {
3968             // For sizes that don't fit a truncating store (3, 5, 6, 7),
3969             // store the whole register as-is to the parameter save area
3970             // slot.
3971             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3972                                  MachinePointerInfo(&*FuncArg));
3973           }
3974 
3975           MemOps.push_back(Store);
3976         }
3977         // Whether we copied from a register or not, advance the offset
3978         // into the parameter save area by a full doubleword.
3979         ArgOffset += PtrByteSize;
3980         continue;
3981       }
3982 
3983       // The value of the object is its address, which is the address of
3984       // its first stack doubleword.
3985       InVals.push_back(FIN);
3986 
3987       // Store whatever pieces of the object are in registers to memory.
3988       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3989         if (GPR_idx == Num_GPR_Regs)
3990           break;
3991 
3992         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3993         FuncInfo->addLiveInAttr(VReg, Flags);
3994         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3995         SDValue Addr = FIN;
3996         if (j) {
3997           SDValue Off = DAG.getConstant(j, dl, PtrVT);
3998           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3999         }
4000         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4001                                      MachinePointerInfo(&*FuncArg, j));
4002         MemOps.push_back(Store);
4003         ++GPR_idx;
4004       }
4005       ArgOffset += ArgSize;
4006       continue;
4007     }
4008 
4009     switch (ObjectVT.getSimpleVT().SimpleTy) {
4010     default: llvm_unreachable("Unhandled argument type!");
4011     case MVT::i1:
4012     case MVT::i32:
4013     case MVT::i64:
4014       if (Flags.isNest()) {
4015         // The 'nest' parameter, if any, is passed in R11.
4016         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4017         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4018 
4019         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4020           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4021 
4022         break;
4023       }
4024 
4025       // These can be scalar arguments or elements of an integer array type
4026       // passed directly.  Clang may use those instead of "byval" aggregate
4027       // types to avoid forcing arguments to memory unnecessarily.
4028       if (GPR_idx != Num_GPR_Regs) {
4029         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4030         FuncInfo->addLiveInAttr(VReg, Flags);
4031         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4032 
4033         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4034           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4035           // value to MVT::i64 and then truncate to the correct register size.
4036           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4037       } else {
4038         if (CallConv == CallingConv::Fast)
4039           ComputeArgOffset();
4040 
4041         needsLoad = true;
4042         ArgSize = PtrByteSize;
4043       }
4044       if (CallConv != CallingConv::Fast || needsLoad)
4045         ArgOffset += 8;
4046       break;
4047 
4048     case MVT::f32:
4049     case MVT::f64:
4050       // These can be scalar arguments or elements of a float array type
4051       // passed directly.  The latter are used to implement ELFv2 homogenous
4052       // float aggregates.
4053       if (FPR_idx != Num_FPR_Regs) {
4054         unsigned VReg;
4055 
4056         if (ObjectVT == MVT::f32)
4057           VReg = MF.addLiveIn(FPR[FPR_idx],
4058                               Subtarget.hasP8Vector()
4059                                   ? &PPC::VSSRCRegClass
4060                                   : &PPC::F4RCRegClass);
4061         else
4062           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4063                                                 ? &PPC::VSFRCRegClass
4064                                                 : &PPC::F8RCRegClass);
4065 
4066         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4067         ++FPR_idx;
4068       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4069         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4070         // once we support fp <-> gpr moves.
4071 
4072         // This can only ever happen in the presence of f32 array types,
4073         // since otherwise we never run out of FPRs before running out
4074         // of GPRs.
4075         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4076         FuncInfo->addLiveInAttr(VReg, Flags);
4077         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4078 
4079         if (ObjectVT == MVT::f32) {
4080           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4081             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4082                                  DAG.getConstant(32, dl, MVT::i32));
4083           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4084         }
4085 
4086         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4087       } else {
4088         if (CallConv == CallingConv::Fast)
4089           ComputeArgOffset();
4090 
4091         needsLoad = true;
4092       }
4093 
4094       // When passing an array of floats, the array occupies consecutive
4095       // space in the argument area; only round up to the next doubleword
4096       // at the end of the array.  Otherwise, each float takes 8 bytes.
4097       if (CallConv != CallingConv::Fast || needsLoad) {
4098         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4099         ArgOffset += ArgSize;
4100         if (Flags.isInConsecutiveRegsLast())
4101           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4102       }
4103       break;
4104     case MVT::v4f32:
4105     case MVT::v4i32:
4106     case MVT::v8i16:
4107     case MVT::v16i8:
4108     case MVT::v2f64:
4109     case MVT::v2i64:
4110     case MVT::v1i128:
4111     case MVT::f128:
4112       // These can be scalar arguments or elements of a vector array type
4113       // passed directly.  The latter are used to implement ELFv2 homogenous
4114       // vector aggregates.
4115       if (VR_idx != Num_VR_Regs) {
4116         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4117         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4118         ++VR_idx;
4119       } else {
4120         if (CallConv == CallingConv::Fast)
4121           ComputeArgOffset();
4122         needsLoad = true;
4123       }
4124       if (CallConv != CallingConv::Fast || needsLoad)
4125         ArgOffset += 16;
4126       break;
4127     }
4128 
4129     // We need to load the argument to a virtual register if we determined
4130     // above that we ran out of physical registers of the appropriate type.
4131     if (needsLoad) {
4132       if (ObjSize < ArgSize && !isLittleEndian)
4133         CurArgOffset += ArgSize - ObjSize;
4134       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4135       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4136       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4137     }
4138 
4139     InVals.push_back(ArgVal);
4140   }
4141 
4142   // Area that is at least reserved in the caller of this function.
4143   unsigned MinReservedArea;
4144   if (HasParameterArea)
4145     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4146   else
4147     MinReservedArea = LinkageSize;
4148 
4149   // Set the size that is at least reserved in caller of this function.  Tail
4150   // call optimized functions' reserved stack space needs to be aligned so that
4151   // taking the difference between two stack areas will result in an aligned
4152   // stack.
4153   MinReservedArea =
4154       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4155   FuncInfo->setMinReservedArea(MinReservedArea);
4156 
4157   // If the function takes variable number of arguments, make a frame index for
4158   // the start of the first vararg value... for expansion of llvm.va_start.
4159   // On ELFv2ABI spec, it writes:
4160   // C programs that are intended to be *portable* across different compilers
4161   // and architectures must use the header file <stdarg.h> to deal with variable
4162   // argument lists.
4163   if (isVarArg && MFI.hasVAStart()) {
4164     int Depth = ArgOffset;
4165 
4166     FuncInfo->setVarArgsFrameIndex(
4167       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4168     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4169 
4170     // If this function is vararg, store any remaining integer argument regs
4171     // to their spots on the stack so that they may be loaded by dereferencing
4172     // the result of va_next.
4173     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4174          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4175       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4176       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4177       SDValue Store =
4178           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4179       MemOps.push_back(Store);
4180       // Increment the address by four for the next argument to store
4181       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4182       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4183     }
4184   }
4185 
4186   if (!MemOps.empty())
4187     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4188 
4189   return Chain;
4190 }
4191 
4192 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4193     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4194     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4195     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4196   // TODO: add description of PPC stack frame format, or at least some docs.
4197   //
4198   MachineFunction &MF = DAG.getMachineFunction();
4199   MachineFrameInfo &MFI = MF.getFrameInfo();
4200   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4201 
4202   EVT PtrVT = getPointerTy(MF.getDataLayout());
4203   bool isPPC64 = PtrVT == MVT::i64;
4204   // Potential tail calls could cause overwriting of argument stack slots.
4205   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4206                        (CallConv == CallingConv::Fast));
4207   unsigned PtrByteSize = isPPC64 ? 8 : 4;
4208   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4209   unsigned ArgOffset = LinkageSize;
4210   // Area that is at least reserved in caller of this function.
4211   unsigned MinReservedArea = ArgOffset;
4212 
4213   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4214     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4215     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4216   };
4217   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4218     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4219     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4220   };
4221   static const MCPhysReg VR[] = {
4222     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4223     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4224   };
4225 
4226   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4227   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4228   const unsigned Num_VR_Regs  = array_lengthof( VR);
4229 
4230   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4231 
4232   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4233 
4234   // In 32-bit non-varargs functions, the stack space for vectors is after the
4235   // stack space for non-vectors.  We do not use this space unless we have
4236   // too many vectors to fit in registers, something that only occurs in
4237   // constructed examples:), but we have to walk the arglist to figure
4238   // that out...for the pathological case, compute VecArgOffset as the
4239   // start of the vector parameter area.  Computing VecArgOffset is the
4240   // entire point of the following loop.
4241   unsigned VecArgOffset = ArgOffset;
4242   if (!isVarArg && !isPPC64) {
4243     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4244          ++ArgNo) {
4245       EVT ObjectVT = Ins[ArgNo].VT;
4246       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4247 
4248       if (Flags.isByVal()) {
4249         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4250         unsigned ObjSize = Flags.getByValSize();
4251         unsigned ArgSize =
4252                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4253         VecArgOffset += ArgSize;
4254         continue;
4255       }
4256 
4257       switch(ObjectVT.getSimpleVT().SimpleTy) {
4258       default: llvm_unreachable("Unhandled argument type!");
4259       case MVT::i1:
4260       case MVT::i32:
4261       case MVT::f32:
4262         VecArgOffset += 4;
4263         break;
4264       case MVT::i64:  // PPC64
4265       case MVT::f64:
4266         // FIXME: We are guaranteed to be !isPPC64 at this point.
4267         // Does MVT::i64 apply?
4268         VecArgOffset += 8;
4269         break;
4270       case MVT::v4f32:
4271       case MVT::v4i32:
4272       case MVT::v8i16:
4273       case MVT::v16i8:
4274         // Nothing to do, we're only looking at Nonvector args here.
4275         break;
4276       }
4277     }
4278   }
4279   // We've found where the vector parameter area in memory is.  Skip the
4280   // first 12 parameters; these don't use that memory.
4281   VecArgOffset = ((VecArgOffset+15)/16)*16;
4282   VecArgOffset += 12*16;
4283 
4284   // Add DAG nodes to load the arguments or copy them out of registers.  On
4285   // entry to a function on PPC, the arguments start after the linkage area,
4286   // although the first ones are often in registers.
4287 
4288   SmallVector<SDValue, 8> MemOps;
4289   unsigned nAltivecParamsAtEnd = 0;
4290   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4291   unsigned CurArgIdx = 0;
4292   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4293     SDValue ArgVal;
4294     bool needsLoad = false;
4295     EVT ObjectVT = Ins[ArgNo].VT;
4296     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4297     unsigned ArgSize = ObjSize;
4298     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4299     if (Ins[ArgNo].isOrigArg()) {
4300       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4301       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4302     }
4303     unsigned CurArgOffset = ArgOffset;
4304 
4305     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4306     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4307         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4308       if (isVarArg || isPPC64) {
4309         MinReservedArea = ((MinReservedArea+15)/16)*16;
4310         MinReservedArea += CalculateStackSlotSize(ObjectVT,
4311                                                   Flags,
4312                                                   PtrByteSize);
4313       } else  nAltivecParamsAtEnd++;
4314     } else
4315       // Calculate min reserved area.
4316       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4317                                                 Flags,
4318                                                 PtrByteSize);
4319 
4320     // FIXME the codegen can be much improved in some cases.
4321     // We do not have to keep everything in memory.
4322     if (Flags.isByVal()) {
4323       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4324 
4325       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4326       ObjSize = Flags.getByValSize();
4327       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4328       // Objects of size 1 and 2 are right justified, everything else is
4329       // left justified.  This means the memory address is adjusted forwards.
4330       if (ObjSize==1 || ObjSize==2) {
4331         CurArgOffset = CurArgOffset + (4 - ObjSize);
4332       }
4333       // The value of the object is its address.
4334       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4335       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4336       InVals.push_back(FIN);
4337       if (ObjSize==1 || ObjSize==2) {
4338         if (GPR_idx != Num_GPR_Regs) {
4339           unsigned VReg;
4340           if (isPPC64)
4341             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4342           else
4343             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4344           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4345           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4346           SDValue Store =
4347               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4348                                 MachinePointerInfo(&*FuncArg), ObjType);
4349           MemOps.push_back(Store);
4350           ++GPR_idx;
4351         }
4352 
4353         ArgOffset += PtrByteSize;
4354 
4355         continue;
4356       }
4357       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4358         // Store whatever pieces of the object are in registers
4359         // to memory.  ArgOffset will be the address of the beginning
4360         // of the object.
4361         if (GPR_idx != Num_GPR_Regs) {
4362           unsigned VReg;
4363           if (isPPC64)
4364             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4365           else
4366             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4367           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4368           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4369           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4370           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4371                                        MachinePointerInfo(&*FuncArg, j));
4372           MemOps.push_back(Store);
4373           ++GPR_idx;
4374           ArgOffset += PtrByteSize;
4375         } else {
4376           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4377           break;
4378         }
4379       }
4380       continue;
4381     }
4382 
4383     switch (ObjectVT.getSimpleVT().SimpleTy) {
4384     default: llvm_unreachable("Unhandled argument type!");
4385     case MVT::i1:
4386     case MVT::i32:
4387       if (!isPPC64) {
4388         if (GPR_idx != Num_GPR_Regs) {
4389           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4390           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4391 
4392           if (ObjectVT == MVT::i1)
4393             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4394 
4395           ++GPR_idx;
4396         } else {
4397           needsLoad = true;
4398           ArgSize = PtrByteSize;
4399         }
4400         // All int arguments reserve stack space in the Darwin ABI.
4401         ArgOffset += PtrByteSize;
4402         break;
4403       }
4404       LLVM_FALLTHROUGH;
4405     case MVT::i64:  // PPC64
4406       if (GPR_idx != Num_GPR_Regs) {
4407         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4408         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4409 
4410         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4411           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4412           // value to MVT::i64 and then truncate to the correct register size.
4413           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4414 
4415         ++GPR_idx;
4416       } else {
4417         needsLoad = true;
4418         ArgSize = PtrByteSize;
4419       }
4420       // All int arguments reserve stack space in the Darwin ABI.
4421       ArgOffset += 8;
4422       break;
4423 
4424     case MVT::f32:
4425     case MVT::f64:
4426       // Every 4 bytes of argument space consumes one of the GPRs available for
4427       // argument passing.
4428       if (GPR_idx != Num_GPR_Regs) {
4429         ++GPR_idx;
4430         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4431           ++GPR_idx;
4432       }
4433       if (FPR_idx != Num_FPR_Regs) {
4434         unsigned VReg;
4435 
4436         if (ObjectVT == MVT::f32)
4437           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4438         else
4439           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4440 
4441         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4442         ++FPR_idx;
4443       } else {
4444         needsLoad = true;
4445       }
4446 
4447       // All FP arguments reserve stack space in the Darwin ABI.
4448       ArgOffset += isPPC64 ? 8 : ObjSize;
4449       break;
4450     case MVT::v4f32:
4451     case MVT::v4i32:
4452     case MVT::v8i16:
4453     case MVT::v16i8:
4454       // Note that vector arguments in registers don't reserve stack space,
4455       // except in varargs functions.
4456       if (VR_idx != Num_VR_Regs) {
4457         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4458         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4459         if (isVarArg) {
4460           while ((ArgOffset % 16) != 0) {
4461             ArgOffset += PtrByteSize;
4462             if (GPR_idx != Num_GPR_Regs)
4463               GPR_idx++;
4464           }
4465           ArgOffset += 16;
4466           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4467         }
4468         ++VR_idx;
4469       } else {
4470         if (!isVarArg && !isPPC64) {
4471           // Vectors go after all the nonvectors.
4472           CurArgOffset = VecArgOffset;
4473           VecArgOffset += 16;
4474         } else {
4475           // Vectors are aligned.
4476           ArgOffset = ((ArgOffset+15)/16)*16;
4477           CurArgOffset = ArgOffset;
4478           ArgOffset += 16;
4479         }
4480         needsLoad = true;
4481       }
4482       break;
4483     }
4484 
4485     // We need to load the argument to a virtual register if we determined above
4486     // that we ran out of physical registers of the appropriate type.
4487     if (needsLoad) {
4488       int FI = MFI.CreateFixedObject(ObjSize,
4489                                      CurArgOffset + (ArgSize - ObjSize),
4490                                      isImmutable);
4491       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4492       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4493     }
4494 
4495     InVals.push_back(ArgVal);
4496   }
4497 
4498   // Allow for Altivec parameters at the end, if needed.
4499   if (nAltivecParamsAtEnd) {
4500     MinReservedArea = ((MinReservedArea+15)/16)*16;
4501     MinReservedArea += 16*nAltivecParamsAtEnd;
4502   }
4503 
4504   // Area that is at least reserved in the caller of this function.
4505   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4506 
4507   // Set the size that is at least reserved in caller of this function.  Tail
4508   // call optimized functions' reserved stack space needs to be aligned so that
4509   // taking the difference between two stack areas will result in an aligned
4510   // stack.
4511   MinReservedArea =
4512       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4513   FuncInfo->setMinReservedArea(MinReservedArea);
4514 
4515   // If the function takes variable number of arguments, make a frame index for
4516   // the start of the first vararg value... for expansion of llvm.va_start.
4517   if (isVarArg) {
4518     int Depth = ArgOffset;
4519 
4520     FuncInfo->setVarArgsFrameIndex(
4521       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4522                             Depth, true));
4523     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4524 
4525     // If this function is vararg, store any remaining integer argument regs
4526     // to their spots on the stack so that they may be loaded by dereferencing
4527     // the result of va_next.
4528     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4529       unsigned VReg;
4530 
4531       if (isPPC64)
4532         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4533       else
4534         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4535 
4536       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4537       SDValue Store =
4538           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4539       MemOps.push_back(Store);
4540       // Increment the address by four for the next argument to store
4541       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4542       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4543     }
4544   }
4545 
4546   if (!MemOps.empty())
4547     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4548 
4549   return Chain;
4550 }
4551 
4552 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4553 /// adjusted to accommodate the arguments for the tailcall.
4554 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4555                                    unsigned ParamSize) {
4556 
4557   if (!isTailCall) return 0;
4558 
4559   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4560   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4561   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4562   // Remember only if the new adjustment is bigger.
4563   if (SPDiff < FI->getTailCallSPDelta())
4564     FI->setTailCallSPDelta(SPDiff);
4565 
4566   return SPDiff;
4567 }
4568 
4569 static bool isFunctionGlobalAddress(SDValue Callee);
4570 
4571 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4572                               const TargetMachine &TM) {
4573   // It does not make sense to call callsShareTOCBase() with a caller that
4574   // is PC Relative since PC Relative callers do not have a TOC.
4575 #ifndef NDEBUG
4576   const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4577   assert(!STICaller->isUsingPCRelativeCalls() &&
4578          "PC Relative callers do not have a TOC and cannot share a TOC Base");
4579 #endif
4580 
4581   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4582   // don't have enough information to determine if the caller and callee share
4583   // the same  TOC base, so we have to pessimistically assume they don't for
4584   // correctness.
4585   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4586   if (!G)
4587     return false;
4588 
4589   const GlobalValue *GV = G->getGlobal();
4590 
4591   // If the callee is preemptable, then the static linker will use a plt-stub
4592   // which saves the toc to the stack, and needs a nop after the call
4593   // instruction to convert to a toc-restore.
4594   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4595     return false;
4596 
4597   // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4598   // We may need a TOC restore in the situation where the caller requires a
4599   // valid TOC but the callee is PC Relative and does not.
4600   const Function *F = dyn_cast<Function>(GV);
4601   const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4602 
4603   // If we have an Alias we can try to get the function from there.
4604   if (Alias) {
4605     const GlobalObject *GlobalObj = Alias->getBaseObject();
4606     F = dyn_cast<Function>(GlobalObj);
4607   }
4608 
4609   // If we still have no valid function pointer we do not have enough
4610   // information to determine if the callee uses PC Relative calls so we must
4611   // assume that it does.
4612   if (!F)
4613     return false;
4614 
4615   // If the callee uses PC Relative we cannot guarantee that the callee won't
4616   // clobber the TOC of the caller and so we must assume that the two
4617   // functions do not share a TOC base.
4618   const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4619   if (STICallee->isUsingPCRelativeCalls())
4620     return false;
4621 
4622   // The medium and large code models are expected to provide a sufficiently
4623   // large TOC to provide all data addressing needs of a module with a
4624   // single TOC.
4625   if (CodeModel::Medium == TM.getCodeModel() ||
4626       CodeModel::Large == TM.getCodeModel())
4627     return true;
4628 
4629   // Otherwise we need to ensure callee and caller are in the same section,
4630   // since the linker may allocate multiple TOCs, and we don't know which
4631   // sections will belong to the same TOC base.
4632   if (!GV->isStrongDefinitionForLinker())
4633     return false;
4634 
4635   // Any explicitly-specified sections and section prefixes must also match.
4636   // Also, if we're using -ffunction-sections, then each function is always in
4637   // a different section (the same is true for COMDAT functions).
4638   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4639       GV->getSection() != Caller->getSection())
4640     return false;
4641   if (const auto *F = dyn_cast<Function>(GV)) {
4642     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4643       return false;
4644   }
4645 
4646   return true;
4647 }
4648 
4649 static bool
4650 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4651                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4652   assert(Subtarget.is64BitELFABI());
4653 
4654   const unsigned PtrByteSize = 8;
4655   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4656 
4657   static const MCPhysReg GPR[] = {
4658     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4659     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4660   };
4661   static const MCPhysReg VR[] = {
4662     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4663     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4664   };
4665 
4666   const unsigned NumGPRs = array_lengthof(GPR);
4667   const unsigned NumFPRs = 13;
4668   const unsigned NumVRs = array_lengthof(VR);
4669   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4670 
4671   unsigned NumBytes = LinkageSize;
4672   unsigned AvailableFPRs = NumFPRs;
4673   unsigned AvailableVRs = NumVRs;
4674 
4675   for (const ISD::OutputArg& Param : Outs) {
4676     if (Param.Flags.isNest()) continue;
4677 
4678     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4679                                LinkageSize, ParamAreaSize, NumBytes,
4680                                AvailableFPRs, AvailableVRs))
4681       return true;
4682   }
4683   return false;
4684 }
4685 
4686 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4687   if (CB.arg_size() != CallerFn->arg_size())
4688     return false;
4689 
4690   auto CalleeArgIter = CB.arg_begin();
4691   auto CalleeArgEnd = CB.arg_end();
4692   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4693 
4694   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4695     const Value* CalleeArg = *CalleeArgIter;
4696     const Value* CallerArg = &(*CallerArgIter);
4697     if (CalleeArg == CallerArg)
4698       continue;
4699 
4700     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4701     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4702     //      }
4703     // 1st argument of callee is undef and has the same type as caller.
4704     if (CalleeArg->getType() == CallerArg->getType() &&
4705         isa<UndefValue>(CalleeArg))
4706       continue;
4707 
4708     return false;
4709   }
4710 
4711   return true;
4712 }
4713 
4714 // Returns true if TCO is possible between the callers and callees
4715 // calling conventions.
4716 static bool
4717 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4718                                     CallingConv::ID CalleeCC) {
4719   // Tail calls are possible with fastcc and ccc.
4720   auto isTailCallableCC  = [] (CallingConv::ID CC){
4721       return  CC == CallingConv::C || CC == CallingConv::Fast;
4722   };
4723   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4724     return false;
4725 
4726   // We can safely tail call both fastcc and ccc callees from a c calling
4727   // convention caller. If the caller is fastcc, we may have less stack space
4728   // than a non-fastcc caller with the same signature so disable tail-calls in
4729   // that case.
4730   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4731 }
4732 
4733 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4734     SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4735     const SmallVectorImpl<ISD::OutputArg> &Outs,
4736     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4737   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4738 
4739   if (DisableSCO && !TailCallOpt) return false;
4740 
4741   // Variadic argument functions are not supported.
4742   if (isVarArg) return false;
4743 
4744   auto &Caller = DAG.getMachineFunction().getFunction();
4745   // Check that the calling conventions are compatible for tco.
4746   if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4747     return false;
4748 
4749   // Caller contains any byval parameter is not supported.
4750   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4751     return false;
4752 
4753   // Callee contains any byval parameter is not supported, too.
4754   // Note: This is a quick work around, because in some cases, e.g.
4755   // caller's stack size > callee's stack size, we are still able to apply
4756   // sibling call optimization. For example, gcc is able to do SCO for caller1
4757   // in the following example, but not for caller2.
4758   //   struct test {
4759   //     long int a;
4760   //     char ary[56];
4761   //   } gTest;
4762   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4763   //     b->a = v.a;
4764   //     return 0;
4765   //   }
4766   //   void caller1(struct test a, struct test c, struct test *b) {
4767   //     callee(gTest, b); }
4768   //   void caller2(struct test *b) { callee(gTest, b); }
4769   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4770     return false;
4771 
4772   // If callee and caller use different calling conventions, we cannot pass
4773   // parameters on stack since offsets for the parameter area may be different.
4774   if (Caller.getCallingConv() != CalleeCC &&
4775       needStackSlotPassParameters(Subtarget, Outs))
4776     return false;
4777 
4778   // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4779   // the caller and callee share the same TOC for TCO/SCO. If the caller and
4780   // callee potentially have different TOC bases then we cannot tail call since
4781   // we need to restore the TOC pointer after the call.
4782   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4783   // We cannot guarantee this for indirect calls or calls to external functions.
4784   // When PC-Relative addressing is used, the concept of the TOC is no longer
4785   // applicable so this check is not required.
4786   // Check first for indirect calls.
4787   if (!Subtarget.isUsingPCRelativeCalls() &&
4788       !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4789     return false;
4790 
4791   // Check if we share the TOC base.
4792   if (!Subtarget.isUsingPCRelativeCalls() &&
4793       !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4794     return false;
4795 
4796   // TCO allows altering callee ABI, so we don't have to check further.
4797   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4798     return true;
4799 
4800   if (DisableSCO) return false;
4801 
4802   // If callee use the same argument list that caller is using, then we can
4803   // apply SCO on this case. If it is not, then we need to check if callee needs
4804   // stack for passing arguments.
4805   // PC Relative tail calls may not have a CallBase.
4806   // If there is no CallBase we cannot verify if we have the same argument
4807   // list so assume that we don't have the same argument list.
4808   if (CB && !hasSameArgumentList(&Caller, *CB) &&
4809       needStackSlotPassParameters(Subtarget, Outs))
4810     return false;
4811   else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4812     return false;
4813 
4814   return true;
4815 }
4816 
4817 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4818 /// for tail call optimization. Targets which want to do tail call
4819 /// optimization should implement this function.
4820 bool
4821 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4822                                                      CallingConv::ID CalleeCC,
4823                                                      bool isVarArg,
4824                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4825                                                      SelectionDAG& DAG) const {
4826   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4827     return false;
4828 
4829   // Variable argument functions are not supported.
4830   if (isVarArg)
4831     return false;
4832 
4833   MachineFunction &MF = DAG.getMachineFunction();
4834   CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4835   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4836     // Functions containing by val parameters are not supported.
4837     for (unsigned i = 0; i != Ins.size(); i++) {
4838        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4839        if (Flags.isByVal()) return false;
4840     }
4841 
4842     // Non-PIC/GOT tail calls are supported.
4843     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4844       return true;
4845 
4846     // At the moment we can only do local tail calls (in same module, hidden
4847     // or protected) if we are generating PIC.
4848     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4849       return G->getGlobal()->hasHiddenVisibility()
4850           || G->getGlobal()->hasProtectedVisibility();
4851   }
4852 
4853   return false;
4854 }
4855 
4856 /// isCallCompatibleAddress - Return the immediate to use if the specified
4857 /// 32-bit value is representable in the immediate field of a BxA instruction.
4858 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4859   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4860   if (!C) return nullptr;
4861 
4862   int Addr = C->getZExtValue();
4863   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4864       SignExtend32<26>(Addr) != Addr)
4865     return nullptr;  // Top 6 bits have to be sext of immediate.
4866 
4867   return DAG
4868       .getConstant(
4869           (int)C->getZExtValue() >> 2, SDLoc(Op),
4870           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4871       .getNode();
4872 }
4873 
4874 namespace {
4875 
4876 struct TailCallArgumentInfo {
4877   SDValue Arg;
4878   SDValue FrameIdxOp;
4879   int FrameIdx = 0;
4880 
4881   TailCallArgumentInfo() = default;
4882 };
4883 
4884 } // end anonymous namespace
4885 
4886 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4887 static void StoreTailCallArgumentsToStackSlot(
4888     SelectionDAG &DAG, SDValue Chain,
4889     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4890     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4891   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4892     SDValue Arg = TailCallArgs[i].Arg;
4893     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4894     int FI = TailCallArgs[i].FrameIdx;
4895     // Store relative to framepointer.
4896     MemOpChains.push_back(DAG.getStore(
4897         Chain, dl, Arg, FIN,
4898         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4899   }
4900 }
4901 
4902 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4903 /// the appropriate stack slot for the tail call optimized function call.
4904 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4905                                              SDValue OldRetAddr, SDValue OldFP,
4906                                              int SPDiff, const SDLoc &dl) {
4907   if (SPDiff) {
4908     // Calculate the new stack slot for the return address.
4909     MachineFunction &MF = DAG.getMachineFunction();
4910     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4911     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4912     bool isPPC64 = Subtarget.isPPC64();
4913     int SlotSize = isPPC64 ? 8 : 4;
4914     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4915     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4916                                                          NewRetAddrLoc, true);
4917     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4918     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4919     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4920                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4921   }
4922   return Chain;
4923 }
4924 
4925 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4926 /// the position of the argument.
4927 static void
4928 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4929                          SDValue Arg, int SPDiff, unsigned ArgOffset,
4930                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4931   int Offset = ArgOffset + SPDiff;
4932   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4933   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4934   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4935   SDValue FIN = DAG.getFrameIndex(FI, VT);
4936   TailCallArgumentInfo Info;
4937   Info.Arg = Arg;
4938   Info.FrameIdxOp = FIN;
4939   Info.FrameIdx = FI;
4940   TailCallArguments.push_back(Info);
4941 }
4942 
4943 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4944 /// stack slot. Returns the chain as result and the loaded frame pointers in
4945 /// LROpOut/FPOpout. Used when tail calling.
4946 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4947     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4948     SDValue &FPOpOut, const SDLoc &dl) const {
4949   if (SPDiff) {
4950     // Load the LR and FP stack slot for later adjusting.
4951     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4952     LROpOut = getReturnAddrFrameIndex(DAG);
4953     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4954     Chain = SDValue(LROpOut.getNode(), 1);
4955   }
4956   return Chain;
4957 }
4958 
4959 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4960 /// by "Src" to address "Dst" of size "Size".  Alignment information is
4961 /// specified by the specific parameter attribute. The copy will be passed as
4962 /// a byval function parameter.
4963 /// Sometimes what we are copying is the end of a larger object, the part that
4964 /// does not fit in registers.
4965 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4966                                          SDValue Chain, ISD::ArgFlagsTy Flags,
4967                                          SelectionDAG &DAG, const SDLoc &dl) {
4968   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4969   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
4970                        Flags.getNonZeroByValAlign(), false, false, false,
4971                        MachinePointerInfo(), MachinePointerInfo());
4972 }
4973 
4974 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4975 /// tail calls.
4976 static void LowerMemOpCallTo(
4977     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4978     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4979     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4980     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4981   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4982   if (!isTailCall) {
4983     if (isVector) {
4984       SDValue StackPtr;
4985       if (isPPC64)
4986         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4987       else
4988         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4989       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4990                            DAG.getConstant(ArgOffset, dl, PtrVT));
4991     }
4992     MemOpChains.push_back(
4993         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4994     // Calculate and remember argument location.
4995   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4996                                   TailCallArguments);
4997 }
4998 
4999 static void
5000 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5001                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5002                 SDValue FPOp,
5003                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5004   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5005   // might overwrite each other in case of tail call optimization.
5006   SmallVector<SDValue, 8> MemOpChains2;
5007   // Do not flag preceding copytoreg stuff together with the following stuff.
5008   InFlag = SDValue();
5009   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5010                                     MemOpChains2, dl);
5011   if (!MemOpChains2.empty())
5012     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5013 
5014   // Store the return address to the appropriate stack slot.
5015   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5016 
5017   // Emit callseq_end just before tailcall node.
5018   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5019                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5020   InFlag = Chain.getValue(1);
5021 }
5022 
5023 // Is this global address that of a function that can be called by name? (as
5024 // opposed to something that must hold a descriptor for an indirect call).
5025 static bool isFunctionGlobalAddress(SDValue Callee) {
5026   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5027     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5028         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5029       return false;
5030 
5031     return G->getGlobal()->getValueType()->isFunctionTy();
5032   }
5033 
5034   return false;
5035 }
5036 
5037 SDValue PPCTargetLowering::LowerCallResult(
5038     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5039     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5040     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5041   SmallVector<CCValAssign, 16> RVLocs;
5042   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5043                     *DAG.getContext());
5044 
5045   CCRetInfo.AnalyzeCallResult(
5046       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5047                ? RetCC_PPC_Cold
5048                : RetCC_PPC);
5049 
5050   // Copy all of the result registers out of their specified physreg.
5051   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5052     CCValAssign &VA = RVLocs[i];
5053     assert(VA.isRegLoc() && "Can only return in registers!");
5054 
5055     SDValue Val;
5056 
5057     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5058       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5059                                       InFlag);
5060       Chain = Lo.getValue(1);
5061       InFlag = Lo.getValue(2);
5062       VA = RVLocs[++i]; // skip ahead to next loc
5063       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5064                                       InFlag);
5065       Chain = Hi.getValue(1);
5066       InFlag = Hi.getValue(2);
5067       if (!Subtarget.isLittleEndian())
5068         std::swap (Lo, Hi);
5069       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5070     } else {
5071       Val = DAG.getCopyFromReg(Chain, dl,
5072                                VA.getLocReg(), VA.getLocVT(), InFlag);
5073       Chain = Val.getValue(1);
5074       InFlag = Val.getValue(2);
5075     }
5076 
5077     switch (VA.getLocInfo()) {
5078     default: llvm_unreachable("Unknown loc info!");
5079     case CCValAssign::Full: break;
5080     case CCValAssign::AExt:
5081       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5082       break;
5083     case CCValAssign::ZExt:
5084       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5085                         DAG.getValueType(VA.getValVT()));
5086       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5087       break;
5088     case CCValAssign::SExt:
5089       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5090                         DAG.getValueType(VA.getValVT()));
5091       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5092       break;
5093     }
5094 
5095     InVals.push_back(Val);
5096   }
5097 
5098   return Chain;
5099 }
5100 
5101 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5102                            const PPCSubtarget &Subtarget, bool isPatchPoint) {
5103   // PatchPoint calls are not indirect.
5104   if (isPatchPoint)
5105     return false;
5106 
5107   if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
5108     return false;
5109 
5110   // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5111   // becuase the immediate function pointer points to a descriptor instead of
5112   // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5113   // pointer immediate points to the global entry point, while the BLA would
5114   // need to jump to the local entry point (see rL211174).
5115   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5116       isBLACompatibleAddress(Callee, DAG))
5117     return false;
5118 
5119   return true;
5120 }
5121 
5122 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5123 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5124   return Subtarget.isAIXABI() ||
5125          (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5126 }
5127 
5128 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5129                               const Function &Caller,
5130                               const SDValue &Callee,
5131                               const PPCSubtarget &Subtarget,
5132                               const TargetMachine &TM) {
5133   if (CFlags.IsTailCall)
5134     return PPCISD::TC_RETURN;
5135 
5136   // This is a call through a function pointer.
5137   if (CFlags.IsIndirect) {
5138     // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5139     // indirect calls. The save of the caller's TOC pointer to the stack will be
5140     // inserted into the DAG as part of call lowering. The restore of the TOC
5141     // pointer is modeled by using a pseudo instruction for the call opcode that
5142     // represents the 2 instruction sequence of an indirect branch and link,
5143     // immediately followed by a load of the TOC pointer from the the stack save
5144     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5145     // as it is not saved or used.
5146     return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5147                                                : PPCISD::BCTRL;
5148   }
5149 
5150   if (Subtarget.isUsingPCRelativeCalls()) {
5151     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5152     return PPCISD::CALL_NOTOC;
5153   }
5154 
5155   // The ABIs that maintain a TOC pointer accross calls need to have a nop
5156   // immediately following the call instruction if the caller and callee may
5157   // have different TOC bases. At link time if the linker determines the calls
5158   // may not share a TOC base, the call is redirected to a trampoline inserted
5159   // by the linker. The trampoline will (among other things) save the callers
5160   // TOC pointer at an ABI designated offset in the linkage area and the linker
5161   // will rewrite the nop to be a load of the TOC pointer from the linkage area
5162   // into gpr2.
5163   if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5164     return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5165                                                   : PPCISD::CALL_NOP;
5166 
5167   return PPCISD::CALL;
5168 }
5169 
5170 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5171                                const SDLoc &dl, const PPCSubtarget &Subtarget) {
5172   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5173     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5174       return SDValue(Dest, 0);
5175 
5176   // Returns true if the callee is local, and false otherwise.
5177   auto isLocalCallee = [&]() {
5178     const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5179     const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5180     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5181 
5182     return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5183            !dyn_cast_or_null<GlobalIFunc>(GV);
5184   };
5185 
5186   // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5187   // a static relocation model causes some versions of GNU LD (2.17.50, at
5188   // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5189   // built with secure-PLT.
5190   bool UsePlt =
5191       Subtarget.is32BitELFABI() && !isLocalCallee() &&
5192       Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5193 
5194   const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5195     const TargetMachine &TM = Subtarget.getTargetMachine();
5196     const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5197     MCSymbolXCOFF *S =
5198         cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5199 
5200     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5201     return DAG.getMCSymbol(S, PtrVT);
5202   };
5203 
5204   if (isFunctionGlobalAddress(Callee)) {
5205     const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5206 
5207     if (Subtarget.isAIXABI()) {
5208       assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5209       return getAIXFuncEntryPointSymbolSDNode(GV);
5210     }
5211     return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5212                                       UsePlt ? PPCII::MO_PLT : 0);
5213   }
5214 
5215   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5216     const char *SymName = S->getSymbol();
5217     if (Subtarget.isAIXABI()) {
5218       // If there exists a user-declared function whose name is the same as the
5219       // ExternalSymbol's, then we pick up the user-declared version.
5220       const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5221       if (const Function *F =
5222               dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5223         return getAIXFuncEntryPointSymbolSDNode(F);
5224 
5225       // On AIX, direct function calls reference the symbol for the function's
5226       // entry point, which is named by prepending a "." before the function's
5227       // C-linkage name. A Qualname is returned here because an external
5228       // function entry point is a csect with XTY_ER property.
5229       const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5230         auto &Context = DAG.getMachineFunction().getMMI().getContext();
5231         MCSectionXCOFF *Sec = Context.getXCOFFSection(
5232             (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5233             SectionKind::getMetadata());
5234         return Sec->getQualNameSymbol();
5235       };
5236 
5237       SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5238     }
5239     return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5240                                        UsePlt ? PPCII::MO_PLT : 0);
5241   }
5242 
5243   // No transformation needed.
5244   assert(Callee.getNode() && "What no callee?");
5245   return Callee;
5246 }
5247 
5248 static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5249   assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5250          "Expected a CALLSEQ_STARTSDNode.");
5251 
5252   // The last operand is the chain, except when the node has glue. If the node
5253   // has glue, then the last operand is the glue, and the chain is the second
5254   // last operand.
5255   SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5256   if (LastValue.getValueType() != MVT::Glue)
5257     return LastValue;
5258 
5259   return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5260 }
5261 
5262 // Creates the node that moves a functions address into the count register
5263 // to prepare for an indirect call instruction.
5264 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5265                                 SDValue &Glue, SDValue &Chain,
5266                                 const SDLoc &dl) {
5267   SDValue MTCTROps[] = {Chain, Callee, Glue};
5268   EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5269   Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5270                       makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5271   // The glue is the second value produced.
5272   Glue = Chain.getValue(1);
5273 }
5274 
5275 static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5276                                           SDValue &Glue, SDValue &Chain,
5277                                           SDValue CallSeqStart,
5278                                           const CallBase *CB, const SDLoc &dl,
5279                                           bool hasNest,
5280                                           const PPCSubtarget &Subtarget) {
5281   // Function pointers in the 64-bit SVR4 ABI do not point to the function
5282   // entry point, but to the function descriptor (the function entry point
5283   // address is part of the function descriptor though).
5284   // The function descriptor is a three doubleword structure with the
5285   // following fields: function entry point, TOC base address and
5286   // environment pointer.
5287   // Thus for a call through a function pointer, the following actions need
5288   // to be performed:
5289   //   1. Save the TOC of the caller in the TOC save area of its stack
5290   //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5291   //   2. Load the address of the function entry point from the function
5292   //      descriptor.
5293   //   3. Load the TOC of the callee from the function descriptor into r2.
5294   //   4. Load the environment pointer from the function descriptor into
5295   //      r11.
5296   //   5. Branch to the function entry point address.
5297   //   6. On return of the callee, the TOC of the caller needs to be
5298   //      restored (this is done in FinishCall()).
5299   //
5300   // The loads are scheduled at the beginning of the call sequence, and the
5301   // register copies are flagged together to ensure that no other
5302   // operations can be scheduled in between. E.g. without flagging the
5303   // copies together, a TOC access in the caller could be scheduled between
5304   // the assignment of the callee TOC and the branch to the callee, which leads
5305   // to incorrect code.
5306 
5307   // Start by loading the function address from the descriptor.
5308   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5309   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5310                       ? (MachineMemOperand::MODereferenceable |
5311                          MachineMemOperand::MOInvariant)
5312                       : MachineMemOperand::MONone;
5313 
5314   MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5315 
5316   // Registers used in building the DAG.
5317   const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5318   const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5319 
5320   // Offsets of descriptor members.
5321   const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5322   const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5323 
5324   const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5325   const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5326 
5327   // One load for the functions entry point address.
5328   SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5329                                     Alignment, MMOFlags);
5330 
5331   // One for loading the TOC anchor for the module that contains the called
5332   // function.
5333   SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5334   SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5335   SDValue TOCPtr =
5336       DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5337                   MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5338 
5339   // One for loading the environment pointer.
5340   SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5341   SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5342   SDValue LoadEnvPtr =
5343       DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5344                   MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5345 
5346 
5347   // Then copy the newly loaded TOC anchor to the TOC pointer.
5348   SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5349   Chain = TOCVal.getValue(0);
5350   Glue = TOCVal.getValue(1);
5351 
5352   // If the function call has an explicit 'nest' parameter, it takes the
5353   // place of the environment pointer.
5354   assert((!hasNest || !Subtarget.isAIXABI()) &&
5355          "Nest parameter is not supported on AIX.");
5356   if (!hasNest) {
5357     SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5358     Chain = EnvVal.getValue(0);
5359     Glue = EnvVal.getValue(1);
5360   }
5361 
5362   // The rest of the indirect call sequence is the same as the non-descriptor
5363   // DAG.
5364   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5365 }
5366 
5367 static void
5368 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5369                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5370                   SelectionDAG &DAG,
5371                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5372                   SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5373                   const PPCSubtarget &Subtarget) {
5374   const bool IsPPC64 = Subtarget.isPPC64();
5375   // MVT for a general purpose register.
5376   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5377 
5378   // First operand is always the chain.
5379   Ops.push_back(Chain);
5380 
5381   // If it's a direct call pass the callee as the second operand.
5382   if (!CFlags.IsIndirect)
5383     Ops.push_back(Callee);
5384   else {
5385     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5386 
5387     // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5388     // on the stack (this would have been done in `LowerCall_64SVR4` or
5389     // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5390     // represents both the indirect branch and a load that restores the TOC
5391     // pointer from the linkage area. The operand for the TOC restore is an add
5392     // of the TOC save offset to the stack pointer. This must be the second
5393     // operand: after the chain input but before any other variadic arguments.
5394     // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5395     // saved or used.
5396     if (isTOCSaveRestoreRequired(Subtarget)) {
5397       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5398 
5399       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5400       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5401       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5402       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5403       Ops.push_back(AddTOC);
5404     }
5405 
5406     // Add the register used for the environment pointer.
5407     if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5408       Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5409                                     RegVT));
5410 
5411 
5412     // Add CTR register as callee so a bctr can be emitted later.
5413     if (CFlags.IsTailCall)
5414       Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5415   }
5416 
5417   // If this is a tail call add stack pointer delta.
5418   if (CFlags.IsTailCall)
5419     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5420 
5421   // Add argument registers to the end of the list so that they are known live
5422   // into the call.
5423   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5424     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5425                                   RegsToPass[i].second.getValueType()));
5426 
5427   // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5428   // no way to mark dependencies as implicit here.
5429   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5430   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5431        !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5432     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5433 
5434   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5435   if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5436     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5437 
5438   // Add a register mask operand representing the call-preserved registers.
5439   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5440   const uint32_t *Mask =
5441       TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5442   assert(Mask && "Missing call preserved mask for calling convention");
5443   Ops.push_back(DAG.getRegisterMask(Mask));
5444 
5445   // If the glue is valid, it is the last operand.
5446   if (Glue.getNode())
5447     Ops.push_back(Glue);
5448 }
5449 
5450 SDValue PPCTargetLowering::FinishCall(
5451     CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5452     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5453     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5454     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5455     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5456 
5457   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5458       Subtarget.isAIXABI())
5459     setUsesTOCBasePtr(DAG);
5460 
5461   unsigned CallOpc =
5462       getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5463                     Subtarget, DAG.getTarget());
5464 
5465   if (!CFlags.IsIndirect)
5466     Callee = transformCallee(Callee, DAG, dl, Subtarget);
5467   else if (Subtarget.usesFunctionDescriptors())
5468     prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5469                                   dl, CFlags.HasNest, Subtarget);
5470   else
5471     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5472 
5473   // Build the operand list for the call instruction.
5474   SmallVector<SDValue, 8> Ops;
5475   buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5476                     SPDiff, Subtarget);
5477 
5478   // Emit tail call.
5479   if (CFlags.IsTailCall) {
5480     // Indirect tail call when using PC Relative calls do not have the same
5481     // constraints.
5482     assert(((Callee.getOpcode() == ISD::Register &&
5483              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5484             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5485             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5486             isa<ConstantSDNode>(Callee) ||
5487             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5488            "Expecting a global address, external symbol, absolute value, "
5489            "register or an indirect tail call when PC Relative calls are "
5490            "used.");
5491     // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5492     assert(CallOpc == PPCISD::TC_RETURN &&
5493            "Unexpected call opcode for a tail call.");
5494     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5495     return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5496   }
5497 
5498   std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5499   Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5500   DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5501   Glue = Chain.getValue(1);
5502 
5503   // When performing tail call optimization the callee pops its arguments off
5504   // the stack. Account for this here so these bytes can be pushed back on in
5505   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5506   int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5507                          getTargetMachine().Options.GuaranteedTailCallOpt)
5508                             ? NumBytes
5509                             : 0;
5510 
5511   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5512                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5513                              Glue, dl);
5514   Glue = Chain.getValue(1);
5515 
5516   return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5517                          DAG, InVals);
5518 }
5519 
5520 SDValue
5521 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5522                              SmallVectorImpl<SDValue> &InVals) const {
5523   SelectionDAG &DAG                     = CLI.DAG;
5524   SDLoc &dl                             = CLI.DL;
5525   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5526   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5527   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5528   SDValue Chain                         = CLI.Chain;
5529   SDValue Callee                        = CLI.Callee;
5530   bool &isTailCall                      = CLI.IsTailCall;
5531   CallingConv::ID CallConv              = CLI.CallConv;
5532   bool isVarArg                         = CLI.IsVarArg;
5533   bool isPatchPoint                     = CLI.IsPatchPoint;
5534   const CallBase *CB                    = CLI.CB;
5535 
5536   if (isTailCall) {
5537     if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5538       isTailCall = false;
5539     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5540       isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5541           Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5542     else
5543       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5544                                                      Ins, DAG);
5545     if (isTailCall) {
5546       ++NumTailCalls;
5547       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5548         ++NumSiblingCalls;
5549 
5550       // PC Relative calls no longer guarantee that the callee is a Global
5551       // Address Node. The callee could be an indirect tail call in which
5552       // case the SDValue for the callee could be a load (to load the address
5553       // of a function pointer) or it may be a register copy (to move the
5554       // address of the callee from a function parameter into a virtual
5555       // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5556       assert((Subtarget.isUsingPCRelativeCalls() ||
5557               isa<GlobalAddressSDNode>(Callee)) &&
5558              "Callee should be an llvm::Function object.");
5559 
5560       LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5561                         << "\nTCO callee: ");
5562       LLVM_DEBUG(Callee.dump());
5563     }
5564   }
5565 
5566   if (!isTailCall && CB && CB->isMustTailCall())
5567     report_fatal_error("failed to perform tail call elimination on a call "
5568                        "site marked musttail");
5569 
5570   // When long calls (i.e. indirect calls) are always used, calls are always
5571   // made via function pointer. If we have a function name, first translate it
5572   // into a pointer.
5573   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5574       !isTailCall)
5575     Callee = LowerGlobalAddress(Callee, DAG);
5576 
5577   CallFlags CFlags(
5578       CallConv, isTailCall, isVarArg, isPatchPoint,
5579       isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5580       // hasNest
5581       Subtarget.is64BitELFABI() &&
5582           any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5583       CLI.NoMerge);
5584 
5585   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5586     return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5587                             InVals, CB);
5588 
5589   if (Subtarget.isSVR4ABI())
5590     return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5591                             InVals, CB);
5592 
5593   if (Subtarget.isAIXABI())
5594     return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5595                          InVals, CB);
5596 
5597   return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5598                           InVals, CB);
5599 }
5600 
5601 SDValue PPCTargetLowering::LowerCall_32SVR4(
5602     SDValue Chain, SDValue Callee, CallFlags CFlags,
5603     const SmallVectorImpl<ISD::OutputArg> &Outs,
5604     const SmallVectorImpl<SDValue> &OutVals,
5605     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5606     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5607     const CallBase *CB) const {
5608   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5609   // of the 32-bit SVR4 ABI stack frame layout.
5610 
5611   const CallingConv::ID CallConv = CFlags.CallConv;
5612   const bool IsVarArg = CFlags.IsVarArg;
5613   const bool IsTailCall = CFlags.IsTailCall;
5614 
5615   assert((CallConv == CallingConv::C ||
5616           CallConv == CallingConv::Cold ||
5617           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5618 
5619   const Align PtrAlign(4);
5620 
5621   MachineFunction &MF = DAG.getMachineFunction();
5622 
5623   // Mark this function as potentially containing a function that contains a
5624   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5625   // and restoring the callers stack pointer in this functions epilog. This is
5626   // done because by tail calling the called function might overwrite the value
5627   // in this function's (MF) stack pointer stack slot 0(SP).
5628   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5629       CallConv == CallingConv::Fast)
5630     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5631 
5632   // Count how many bytes are to be pushed on the stack, including the linkage
5633   // area, parameter list area and the part of the local variable space which
5634   // contains copies of aggregates which are passed by value.
5635 
5636   // Assign locations to all of the outgoing arguments.
5637   SmallVector<CCValAssign, 16> ArgLocs;
5638   PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5639 
5640   // Reserve space for the linkage area on the stack.
5641   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5642                        PtrAlign);
5643   if (useSoftFloat())
5644     CCInfo.PreAnalyzeCallOperands(Outs);
5645 
5646   if (IsVarArg) {
5647     // Handle fixed and variable vector arguments differently.
5648     // Fixed vector arguments go into registers as long as registers are
5649     // available. Variable vector arguments always go into memory.
5650     unsigned NumArgs = Outs.size();
5651 
5652     for (unsigned i = 0; i != NumArgs; ++i) {
5653       MVT ArgVT = Outs[i].VT;
5654       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5655       bool Result;
5656 
5657       if (Outs[i].IsFixed) {
5658         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5659                                CCInfo);
5660       } else {
5661         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5662                                       ArgFlags, CCInfo);
5663       }
5664 
5665       if (Result) {
5666 #ifndef NDEBUG
5667         errs() << "Call operand #" << i << " has unhandled type "
5668              << EVT(ArgVT).getEVTString() << "\n";
5669 #endif
5670         llvm_unreachable(nullptr);
5671       }
5672     }
5673   } else {
5674     // All arguments are treated the same.
5675     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5676   }
5677   CCInfo.clearWasPPCF128();
5678 
5679   // Assign locations to all of the outgoing aggregate by value arguments.
5680   SmallVector<CCValAssign, 16> ByValArgLocs;
5681   CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5682 
5683   // Reserve stack space for the allocations in CCInfo.
5684   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5685 
5686   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5687 
5688   // Size of the linkage area, parameter list area and the part of the local
5689   // space variable where copies of aggregates which are passed by value are
5690   // stored.
5691   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5692 
5693   // Calculate by how many bytes the stack has to be adjusted in case of tail
5694   // call optimization.
5695   int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5696 
5697   // Adjust the stack pointer for the new arguments...
5698   // These operations are automatically eliminated by the prolog/epilog pass
5699   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5700   SDValue CallSeqStart = Chain;
5701 
5702   // Load the return address and frame pointer so it can be moved somewhere else
5703   // later.
5704   SDValue LROp, FPOp;
5705   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5706 
5707   // Set up a copy of the stack pointer for use loading and storing any
5708   // arguments that may not fit in the registers available for argument
5709   // passing.
5710   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5711 
5712   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5713   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5714   SmallVector<SDValue, 8> MemOpChains;
5715 
5716   bool seenFloatArg = false;
5717   // Walk the register/memloc assignments, inserting copies/loads.
5718   // i - Tracks the index into the list of registers allocated for the call
5719   // RealArgIdx - Tracks the index into the list of actual function arguments
5720   // j - Tracks the index into the list of byval arguments
5721   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5722        i != e;
5723        ++i, ++RealArgIdx) {
5724     CCValAssign &VA = ArgLocs[i];
5725     SDValue Arg = OutVals[RealArgIdx];
5726     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5727 
5728     if (Flags.isByVal()) {
5729       // Argument is an aggregate which is passed by value, thus we need to
5730       // create a copy of it in the local variable space of the current stack
5731       // frame (which is the stack frame of the caller) and pass the address of
5732       // this copy to the callee.
5733       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5734       CCValAssign &ByValVA = ByValArgLocs[j++];
5735       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5736 
5737       // Memory reserved in the local variable space of the callers stack frame.
5738       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5739 
5740       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5741       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5742                            StackPtr, PtrOff);
5743 
5744       // Create a copy of the argument in the local area of the current
5745       // stack frame.
5746       SDValue MemcpyCall =
5747         CreateCopyOfByValArgument(Arg, PtrOff,
5748                                   CallSeqStart.getNode()->getOperand(0),
5749                                   Flags, DAG, dl);
5750 
5751       // This must go outside the CALLSEQ_START..END.
5752       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5753                                                      SDLoc(MemcpyCall));
5754       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5755                              NewCallSeqStart.getNode());
5756       Chain = CallSeqStart = NewCallSeqStart;
5757 
5758       // Pass the address of the aggregate copy on the stack either in a
5759       // physical register or in the parameter list area of the current stack
5760       // frame to the callee.
5761       Arg = PtrOff;
5762     }
5763 
5764     // When useCRBits() is true, there can be i1 arguments.
5765     // It is because getRegisterType(MVT::i1) => MVT::i1,
5766     // and for other integer types getRegisterType() => MVT::i32.
5767     // Extend i1 and ensure callee will get i32.
5768     if (Arg.getValueType() == MVT::i1)
5769       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5770                         dl, MVT::i32, Arg);
5771 
5772     if (VA.isRegLoc()) {
5773       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5774       // Put argument in a physical register.
5775       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5776         bool IsLE = Subtarget.isLittleEndian();
5777         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5778                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5779         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5780         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5781                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5782         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5783                              SVal.getValue(0)));
5784       } else
5785         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5786     } else {
5787       // Put argument in the parameter list area of the current stack frame.
5788       assert(VA.isMemLoc());
5789       unsigned LocMemOffset = VA.getLocMemOffset();
5790 
5791       if (!IsTailCall) {
5792         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5793         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5794                              StackPtr, PtrOff);
5795 
5796         MemOpChains.push_back(
5797             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5798       } else {
5799         // Calculate and remember argument location.
5800         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5801                                  TailCallArguments);
5802       }
5803     }
5804   }
5805 
5806   if (!MemOpChains.empty())
5807     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5808 
5809   // Build a sequence of copy-to-reg nodes chained together with token chain
5810   // and flag operands which copy the outgoing args into the appropriate regs.
5811   SDValue InFlag;
5812   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5813     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5814                              RegsToPass[i].second, InFlag);
5815     InFlag = Chain.getValue(1);
5816   }
5817 
5818   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5819   // registers.
5820   if (IsVarArg) {
5821     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5822     SDValue Ops[] = { Chain, InFlag };
5823 
5824     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5825                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5826 
5827     InFlag = Chain.getValue(1);
5828   }
5829 
5830   if (IsTailCall)
5831     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5832                     TailCallArguments);
5833 
5834   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5835                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
5836 }
5837 
5838 // Copy an argument into memory, being careful to do this outside the
5839 // call sequence for the call to which the argument belongs.
5840 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5841     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5842     SelectionDAG &DAG, const SDLoc &dl) const {
5843   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5844                         CallSeqStart.getNode()->getOperand(0),
5845                         Flags, DAG, dl);
5846   // The MEMCPY must go outside the CALLSEQ_START..END.
5847   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5848   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5849                                                  SDLoc(MemcpyCall));
5850   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5851                          NewCallSeqStart.getNode());
5852   return NewCallSeqStart;
5853 }
5854 
5855 SDValue PPCTargetLowering::LowerCall_64SVR4(
5856     SDValue Chain, SDValue Callee, CallFlags CFlags,
5857     const SmallVectorImpl<ISD::OutputArg> &Outs,
5858     const SmallVectorImpl<SDValue> &OutVals,
5859     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5860     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5861     const CallBase *CB) const {
5862   bool isELFv2ABI = Subtarget.isELFv2ABI();
5863   bool isLittleEndian = Subtarget.isLittleEndian();
5864   unsigned NumOps = Outs.size();
5865   bool IsSibCall = false;
5866   bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5867 
5868   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5869   unsigned PtrByteSize = 8;
5870 
5871   MachineFunction &MF = DAG.getMachineFunction();
5872 
5873   if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5874     IsSibCall = true;
5875 
5876   // Mark this function as potentially containing a function that contains a
5877   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5878   // and restoring the callers stack pointer in this functions epilog. This is
5879   // done because by tail calling the called function might overwrite the value
5880   // in this function's (MF) stack pointer stack slot 0(SP).
5881   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5882     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5883 
5884   assert(!(IsFastCall && CFlags.IsVarArg) &&
5885          "fastcc not supported on varargs functions");
5886 
5887   // Count how many bytes are to be pushed on the stack, including the linkage
5888   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5889   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5890   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5891   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5892   unsigned NumBytes = LinkageSize;
5893   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5894 
5895   static const MCPhysReg GPR[] = {
5896     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5897     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5898   };
5899   static const MCPhysReg VR[] = {
5900     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5901     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5902   };
5903 
5904   const unsigned NumGPRs = array_lengthof(GPR);
5905   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5906   const unsigned NumVRs  = array_lengthof(VR);
5907 
5908   // On ELFv2, we can avoid allocating the parameter area if all the arguments
5909   // can be passed to the callee in registers.
5910   // For the fast calling convention, there is another check below.
5911   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5912   bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5913   if (!HasParameterArea) {
5914     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5915     unsigned AvailableFPRs = NumFPRs;
5916     unsigned AvailableVRs = NumVRs;
5917     unsigned NumBytesTmp = NumBytes;
5918     for (unsigned i = 0; i != NumOps; ++i) {
5919       if (Outs[i].Flags.isNest()) continue;
5920       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5921                                  PtrByteSize, LinkageSize, ParamAreaSize,
5922                                  NumBytesTmp, AvailableFPRs, AvailableVRs))
5923         HasParameterArea = true;
5924     }
5925   }
5926 
5927   // When using the fast calling convention, we don't provide backing for
5928   // arguments that will be in registers.
5929   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5930 
5931   // Avoid allocating parameter area for fastcc functions if all the arguments
5932   // can be passed in the registers.
5933   if (IsFastCall)
5934     HasParameterArea = false;
5935 
5936   // Add up all the space actually used.
5937   for (unsigned i = 0; i != NumOps; ++i) {
5938     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5939     EVT ArgVT = Outs[i].VT;
5940     EVT OrigVT = Outs[i].ArgVT;
5941 
5942     if (Flags.isNest())
5943       continue;
5944 
5945     if (IsFastCall) {
5946       if (Flags.isByVal()) {
5947         NumGPRsUsed += (Flags.getByValSize()+7)/8;
5948         if (NumGPRsUsed > NumGPRs)
5949           HasParameterArea = true;
5950       } else {
5951         switch (ArgVT.getSimpleVT().SimpleTy) {
5952         default: llvm_unreachable("Unexpected ValueType for argument!");
5953         case MVT::i1:
5954         case MVT::i32:
5955         case MVT::i64:
5956           if (++NumGPRsUsed <= NumGPRs)
5957             continue;
5958           break;
5959         case MVT::v4i32:
5960         case MVT::v8i16:
5961         case MVT::v16i8:
5962         case MVT::v2f64:
5963         case MVT::v2i64:
5964         case MVT::v1i128:
5965         case MVT::f128:
5966           if (++NumVRsUsed <= NumVRs)
5967             continue;
5968           break;
5969         case MVT::v4f32:
5970           if (++NumVRsUsed <= NumVRs)
5971             continue;
5972           break;
5973         case MVT::f32:
5974         case MVT::f64:
5975           if (++NumFPRsUsed <= NumFPRs)
5976             continue;
5977           break;
5978         }
5979         HasParameterArea = true;
5980       }
5981     }
5982 
5983     /* Respect alignment of argument on the stack.  */
5984     auto Alignement =
5985         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5986     NumBytes = alignTo(NumBytes, Alignement);
5987 
5988     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5989     if (Flags.isInConsecutiveRegsLast())
5990       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5991   }
5992 
5993   unsigned NumBytesActuallyUsed = NumBytes;
5994 
5995   // In the old ELFv1 ABI,
5996   // the prolog code of the callee may store up to 8 GPR argument registers to
5997   // the stack, allowing va_start to index over them in memory if its varargs.
5998   // Because we cannot tell if this is needed on the caller side, we have to
5999   // conservatively assume that it is needed.  As such, make sure we have at
6000   // least enough stack space for the caller to store the 8 GPRs.
6001   // In the ELFv2 ABI, we allocate the parameter area iff a callee
6002   // really requires memory operands, e.g. a vararg function.
6003   if (HasParameterArea)
6004     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6005   else
6006     NumBytes = LinkageSize;
6007 
6008   // Tail call needs the stack to be aligned.
6009   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6010     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6011 
6012   int SPDiff = 0;
6013 
6014   // Calculate by how many bytes the stack has to be adjusted in case of tail
6015   // call optimization.
6016   if (!IsSibCall)
6017     SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6018 
6019   // To protect arguments on the stack from being clobbered in a tail call,
6020   // force all the loads to happen before doing any other lowering.
6021   if (CFlags.IsTailCall)
6022     Chain = DAG.getStackArgumentTokenFactor(Chain);
6023 
6024   // Adjust the stack pointer for the new arguments...
6025   // These operations are automatically eliminated by the prolog/epilog pass
6026   if (!IsSibCall)
6027     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6028   SDValue CallSeqStart = Chain;
6029 
6030   // Load the return address and frame pointer so it can be move somewhere else
6031   // later.
6032   SDValue LROp, FPOp;
6033   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6034 
6035   // Set up a copy of the stack pointer for use loading and storing any
6036   // arguments that may not fit in the registers available for argument
6037   // passing.
6038   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6039 
6040   // Figure out which arguments are going to go in registers, and which in
6041   // memory.  Also, if this is a vararg function, floating point operations
6042   // must be stored to our stack, and loaded into integer regs as well, if
6043   // any integer regs are available for argument passing.
6044   unsigned ArgOffset = LinkageSize;
6045 
6046   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6047   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6048 
6049   SmallVector<SDValue, 8> MemOpChains;
6050   for (unsigned i = 0; i != NumOps; ++i) {
6051     SDValue Arg = OutVals[i];
6052     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6053     EVT ArgVT = Outs[i].VT;
6054     EVT OrigVT = Outs[i].ArgVT;
6055 
6056     // PtrOff will be used to store the current argument to the stack if a
6057     // register cannot be found for it.
6058     SDValue PtrOff;
6059 
6060     // We re-align the argument offset for each argument, except when using the
6061     // fast calling convention, when we need to make sure we do that only when
6062     // we'll actually use a stack slot.
6063     auto ComputePtrOff = [&]() {
6064       /* Respect alignment of argument on the stack.  */
6065       auto Alignment =
6066           CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6067       ArgOffset = alignTo(ArgOffset, Alignment);
6068 
6069       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6070 
6071       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6072     };
6073 
6074     if (!IsFastCall) {
6075       ComputePtrOff();
6076 
6077       /* Compute GPR index associated with argument offset.  */
6078       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6079       GPR_idx = std::min(GPR_idx, NumGPRs);
6080     }
6081 
6082     // Promote integers to 64-bit values.
6083     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6084       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6085       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6086       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6087     }
6088 
6089     // FIXME memcpy is used way more than necessary.  Correctness first.
6090     // Note: "by value" is code for passing a structure by value, not
6091     // basic types.
6092     if (Flags.isByVal()) {
6093       // Note: Size includes alignment padding, so
6094       //   struct x { short a; char b; }
6095       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
6096       // These are the proper values we need for right-justifying the
6097       // aggregate in a parameter register.
6098       unsigned Size = Flags.getByValSize();
6099 
6100       // An empty aggregate parameter takes up no storage and no
6101       // registers.
6102       if (Size == 0)
6103         continue;
6104 
6105       if (IsFastCall)
6106         ComputePtrOff();
6107 
6108       // All aggregates smaller than 8 bytes must be passed right-justified.
6109       if (Size==1 || Size==2 || Size==4) {
6110         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6111         if (GPR_idx != NumGPRs) {
6112           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6113                                         MachinePointerInfo(), VT);
6114           MemOpChains.push_back(Load.getValue(1));
6115           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6116 
6117           ArgOffset += PtrByteSize;
6118           continue;
6119         }
6120       }
6121 
6122       if (GPR_idx == NumGPRs && Size < 8) {
6123         SDValue AddPtr = PtrOff;
6124         if (!isLittleEndian) {
6125           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6126                                           PtrOff.getValueType());
6127           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6128         }
6129         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6130                                                           CallSeqStart,
6131                                                           Flags, DAG, dl);
6132         ArgOffset += PtrByteSize;
6133         continue;
6134       }
6135       // Copy entire object into memory.  There are cases where gcc-generated
6136       // code assumes it is there, even if it could be put entirely into
6137       // registers.  (This is not what the doc says.)
6138 
6139       // FIXME: The above statement is likely due to a misunderstanding of the
6140       // documents.  All arguments must be copied into the parameter area BY
6141       // THE CALLEE in the event that the callee takes the address of any
6142       // formal argument.  That has not yet been implemented.  However, it is
6143       // reasonable to use the stack area as a staging area for the register
6144       // load.
6145 
6146       // Skip this for small aggregates, as we will use the same slot for a
6147       // right-justified copy, below.
6148       if (Size >= 8)
6149         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6150                                                           CallSeqStart,
6151                                                           Flags, DAG, dl);
6152 
6153       // When a register is available, pass a small aggregate right-justified.
6154       if (Size < 8 && GPR_idx != NumGPRs) {
6155         // The easiest way to get this right-justified in a register
6156         // is to copy the structure into the rightmost portion of a
6157         // local variable slot, then load the whole slot into the
6158         // register.
6159         // FIXME: The memcpy seems to produce pretty awful code for
6160         // small aggregates, particularly for packed ones.
6161         // FIXME: It would be preferable to use the slot in the
6162         // parameter save area instead of a new local variable.
6163         SDValue AddPtr = PtrOff;
6164         if (!isLittleEndian) {
6165           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6166           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6167         }
6168         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6169                                                           CallSeqStart,
6170                                                           Flags, DAG, dl);
6171 
6172         // Load the slot into the register.
6173         SDValue Load =
6174             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6175         MemOpChains.push_back(Load.getValue(1));
6176         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6177 
6178         // Done with this argument.
6179         ArgOffset += PtrByteSize;
6180         continue;
6181       }
6182 
6183       // For aggregates larger than PtrByteSize, copy the pieces of the
6184       // object that fit into registers from the parameter save area.
6185       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6186         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6187         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6188         if (GPR_idx != NumGPRs) {
6189           SDValue Load =
6190               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6191           MemOpChains.push_back(Load.getValue(1));
6192           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6193           ArgOffset += PtrByteSize;
6194         } else {
6195           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6196           break;
6197         }
6198       }
6199       continue;
6200     }
6201 
6202     switch (Arg.getSimpleValueType().SimpleTy) {
6203     default: llvm_unreachable("Unexpected ValueType for argument!");
6204     case MVT::i1:
6205     case MVT::i32:
6206     case MVT::i64:
6207       if (Flags.isNest()) {
6208         // The 'nest' parameter, if any, is passed in R11.
6209         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6210         break;
6211       }
6212 
6213       // These can be scalar arguments or elements of an integer array type
6214       // passed directly.  Clang may use those instead of "byval" aggregate
6215       // types to avoid forcing arguments to memory unnecessarily.
6216       if (GPR_idx != NumGPRs) {
6217         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6218       } else {
6219         if (IsFastCall)
6220           ComputePtrOff();
6221 
6222         assert(HasParameterArea &&
6223                "Parameter area must exist to pass an argument in memory.");
6224         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6225                          true, CFlags.IsTailCall, false, MemOpChains,
6226                          TailCallArguments, dl);
6227         if (IsFastCall)
6228           ArgOffset += PtrByteSize;
6229       }
6230       if (!IsFastCall)
6231         ArgOffset += PtrByteSize;
6232       break;
6233     case MVT::f32:
6234     case MVT::f64: {
6235       // These can be scalar arguments or elements of a float array type
6236       // passed directly.  The latter are used to implement ELFv2 homogenous
6237       // float aggregates.
6238 
6239       // Named arguments go into FPRs first, and once they overflow, the
6240       // remaining arguments go into GPRs and then the parameter save area.
6241       // Unnamed arguments for vararg functions always go to GPRs and
6242       // then the parameter save area.  For now, put all arguments to vararg
6243       // routines always in both locations (FPR *and* GPR or stack slot).
6244       bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6245       bool NeededLoad = false;
6246 
6247       // First load the argument into the next available FPR.
6248       if (FPR_idx != NumFPRs)
6249         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6250 
6251       // Next, load the argument into GPR or stack slot if needed.
6252       if (!NeedGPROrStack)
6253         ;
6254       else if (GPR_idx != NumGPRs && !IsFastCall) {
6255         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6256         // once we support fp <-> gpr moves.
6257 
6258         // In the non-vararg case, this can only ever happen in the
6259         // presence of f32 array types, since otherwise we never run
6260         // out of FPRs before running out of GPRs.
6261         SDValue ArgVal;
6262 
6263         // Double values are always passed in a single GPR.
6264         if (Arg.getValueType() != MVT::f32) {
6265           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6266 
6267         // Non-array float values are extended and passed in a GPR.
6268         } else if (!Flags.isInConsecutiveRegs()) {
6269           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6270           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6271 
6272         // If we have an array of floats, we collect every odd element
6273         // together with its predecessor into one GPR.
6274         } else if (ArgOffset % PtrByteSize != 0) {
6275           SDValue Lo, Hi;
6276           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6277           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6278           if (!isLittleEndian)
6279             std::swap(Lo, Hi);
6280           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6281 
6282         // The final element, if even, goes into the first half of a GPR.
6283         } else if (Flags.isInConsecutiveRegsLast()) {
6284           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6285           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6286           if (!isLittleEndian)
6287             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6288                                  DAG.getConstant(32, dl, MVT::i32));
6289 
6290         // Non-final even elements are skipped; they will be handled
6291         // together the with subsequent argument on the next go-around.
6292         } else
6293           ArgVal = SDValue();
6294 
6295         if (ArgVal.getNode())
6296           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6297       } else {
6298         if (IsFastCall)
6299           ComputePtrOff();
6300 
6301         // Single-precision floating-point values are mapped to the
6302         // second (rightmost) word of the stack doubleword.
6303         if (Arg.getValueType() == MVT::f32 &&
6304             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6305           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6306           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6307         }
6308 
6309         assert(HasParameterArea &&
6310                "Parameter area must exist to pass an argument in memory.");
6311         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6312                          true, CFlags.IsTailCall, false, MemOpChains,
6313                          TailCallArguments, dl);
6314 
6315         NeededLoad = true;
6316       }
6317       // When passing an array of floats, the array occupies consecutive
6318       // space in the argument area; only round up to the next doubleword
6319       // at the end of the array.  Otherwise, each float takes 8 bytes.
6320       if (!IsFastCall || NeededLoad) {
6321         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6322                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6323         if (Flags.isInConsecutiveRegsLast())
6324           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6325       }
6326       break;
6327     }
6328     case MVT::v4f32:
6329     case MVT::v4i32:
6330     case MVT::v8i16:
6331     case MVT::v16i8:
6332     case MVT::v2f64:
6333     case MVT::v2i64:
6334     case MVT::v1i128:
6335     case MVT::f128:
6336       // These can be scalar arguments or elements of a vector array type
6337       // passed directly.  The latter are used to implement ELFv2 homogenous
6338       // vector aggregates.
6339 
6340       // For a varargs call, named arguments go into VRs or on the stack as
6341       // usual; unnamed arguments always go to the stack or the corresponding
6342       // GPRs when within range.  For now, we always put the value in both
6343       // locations (or even all three).
6344       if (CFlags.IsVarArg) {
6345         assert(HasParameterArea &&
6346                "Parameter area must exist if we have a varargs call.");
6347         // We could elide this store in the case where the object fits
6348         // entirely in R registers.  Maybe later.
6349         SDValue Store =
6350             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6351         MemOpChains.push_back(Store);
6352         if (VR_idx != NumVRs) {
6353           SDValue Load =
6354               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6355           MemOpChains.push_back(Load.getValue(1));
6356           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6357         }
6358         ArgOffset += 16;
6359         for (unsigned i=0; i<16; i+=PtrByteSize) {
6360           if (GPR_idx == NumGPRs)
6361             break;
6362           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6363                                    DAG.getConstant(i, dl, PtrVT));
6364           SDValue Load =
6365               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6366           MemOpChains.push_back(Load.getValue(1));
6367           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6368         }
6369         break;
6370       }
6371 
6372       // Non-varargs Altivec params go into VRs or on the stack.
6373       if (VR_idx != NumVRs) {
6374         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6375       } else {
6376         if (IsFastCall)
6377           ComputePtrOff();
6378 
6379         assert(HasParameterArea &&
6380                "Parameter area must exist to pass an argument in memory.");
6381         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6382                          true, CFlags.IsTailCall, true, MemOpChains,
6383                          TailCallArguments, dl);
6384         if (IsFastCall)
6385           ArgOffset += 16;
6386       }
6387 
6388       if (!IsFastCall)
6389         ArgOffset += 16;
6390       break;
6391     }
6392   }
6393 
6394   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6395          "mismatch in size of parameter area");
6396   (void)NumBytesActuallyUsed;
6397 
6398   if (!MemOpChains.empty())
6399     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6400 
6401   // Check if this is an indirect call (MTCTR/BCTRL).
6402   // See prepareDescriptorIndirectCall and buildCallOperands for more
6403   // information about calls through function pointers in the 64-bit SVR4 ABI.
6404   if (CFlags.IsIndirect) {
6405     // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6406     // caller in the TOC save area.
6407     if (isTOCSaveRestoreRequired(Subtarget)) {
6408       assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6409       // Load r2 into a virtual register and store it to the TOC save area.
6410       setUsesTOCBasePtr(DAG);
6411       SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6412       // TOC save area offset.
6413       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6414       SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6415       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6416       Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6417                            MachinePointerInfo::getStack(
6418                                DAG.getMachineFunction(), TOCSaveOffset));
6419     }
6420     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6421     // This does not mean the MTCTR instruction must use R12; it's easier
6422     // to model this as an extra parameter, so do that.
6423     if (isELFv2ABI && !CFlags.IsPatchPoint)
6424       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6425   }
6426 
6427   // Build a sequence of copy-to-reg nodes chained together with token chain
6428   // and flag operands which copy the outgoing args into the appropriate regs.
6429   SDValue InFlag;
6430   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6431     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6432                              RegsToPass[i].second, InFlag);
6433     InFlag = Chain.getValue(1);
6434   }
6435 
6436   if (CFlags.IsTailCall && !IsSibCall)
6437     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6438                     TailCallArguments);
6439 
6440   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6441                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6442 }
6443 
6444 SDValue PPCTargetLowering::LowerCall_Darwin(
6445     SDValue Chain, SDValue Callee, CallFlags CFlags,
6446     const SmallVectorImpl<ISD::OutputArg> &Outs,
6447     const SmallVectorImpl<SDValue> &OutVals,
6448     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6449     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6450     const CallBase *CB) const {
6451   unsigned NumOps = Outs.size();
6452 
6453   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6454   bool isPPC64 = PtrVT == MVT::i64;
6455   unsigned PtrByteSize = isPPC64 ? 8 : 4;
6456 
6457   MachineFunction &MF = DAG.getMachineFunction();
6458 
6459   // Mark this function as potentially containing a function that contains a
6460   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6461   // and restoring the callers stack pointer in this functions epilog. This is
6462   // done because by tail calling the called function might overwrite the value
6463   // in this function's (MF) stack pointer stack slot 0(SP).
6464   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6465       CFlags.CallConv == CallingConv::Fast)
6466     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6467 
6468   // Count how many bytes are to be pushed on the stack, including the linkage
6469   // area, and parameter passing area.  We start with 24/48 bytes, which is
6470   // prereserved space for [SP][CR][LR][3 x unused].
6471   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6472   unsigned NumBytes = LinkageSize;
6473 
6474   // Add up all the space actually used.
6475   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6476   // they all go in registers, but we must reserve stack space for them for
6477   // possible use by the caller.  In varargs or 64-bit calls, parameters are
6478   // assigned stack space in order, with padding so Altivec parameters are
6479   // 16-byte aligned.
6480   unsigned nAltivecParamsAtEnd = 0;
6481   for (unsigned i = 0; i != NumOps; ++i) {
6482     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6483     EVT ArgVT = Outs[i].VT;
6484     // Varargs Altivec parameters are padded to a 16 byte boundary.
6485     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6486         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6487         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6488       if (!CFlags.IsVarArg && !isPPC64) {
6489         // Non-varargs Altivec parameters go after all the non-Altivec
6490         // parameters; handle those later so we know how much padding we need.
6491         nAltivecParamsAtEnd++;
6492         continue;
6493       }
6494       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6495       NumBytes = ((NumBytes+15)/16)*16;
6496     }
6497     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6498   }
6499 
6500   // Allow for Altivec parameters at the end, if needed.
6501   if (nAltivecParamsAtEnd) {
6502     NumBytes = ((NumBytes+15)/16)*16;
6503     NumBytes += 16*nAltivecParamsAtEnd;
6504   }
6505 
6506   // The prolog code of the callee may store up to 8 GPR argument registers to
6507   // the stack, allowing va_start to index over them in memory if its varargs.
6508   // Because we cannot tell if this is needed on the caller side, we have to
6509   // conservatively assume that it is needed.  As such, make sure we have at
6510   // least enough stack space for the caller to store the 8 GPRs.
6511   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6512 
6513   // Tail call needs the stack to be aligned.
6514   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6515       CFlags.CallConv == CallingConv::Fast)
6516     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6517 
6518   // Calculate by how many bytes the stack has to be adjusted in case of tail
6519   // call optimization.
6520   int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6521 
6522   // To protect arguments on the stack from being clobbered in a tail call,
6523   // force all the loads to happen before doing any other lowering.
6524   if (CFlags.IsTailCall)
6525     Chain = DAG.getStackArgumentTokenFactor(Chain);
6526 
6527   // Adjust the stack pointer for the new arguments...
6528   // These operations are automatically eliminated by the prolog/epilog pass
6529   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6530   SDValue CallSeqStart = Chain;
6531 
6532   // Load the return address and frame pointer so it can be move somewhere else
6533   // later.
6534   SDValue LROp, FPOp;
6535   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6536 
6537   // Set up a copy of the stack pointer for use loading and storing any
6538   // arguments that may not fit in the registers available for argument
6539   // passing.
6540   SDValue StackPtr;
6541   if (isPPC64)
6542     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6543   else
6544     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6545 
6546   // Figure out which arguments are going to go in registers, and which in
6547   // memory.  Also, if this is a vararg function, floating point operations
6548   // must be stored to our stack, and loaded into integer regs as well, if
6549   // any integer regs are available for argument passing.
6550   unsigned ArgOffset = LinkageSize;
6551   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6552 
6553   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6554     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6555     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6556   };
6557   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6558     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6559     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6560   };
6561   static const MCPhysReg VR[] = {
6562     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6563     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6564   };
6565   const unsigned NumGPRs = array_lengthof(GPR_32);
6566   const unsigned NumFPRs = 13;
6567   const unsigned NumVRs  = array_lengthof(VR);
6568 
6569   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6570 
6571   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6572   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6573 
6574   SmallVector<SDValue, 8> MemOpChains;
6575   for (unsigned i = 0; i != NumOps; ++i) {
6576     SDValue Arg = OutVals[i];
6577     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6578 
6579     // PtrOff will be used to store the current argument to the stack if a
6580     // register cannot be found for it.
6581     SDValue PtrOff;
6582 
6583     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6584 
6585     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6586 
6587     // On PPC64, promote integers to 64-bit values.
6588     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6589       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6590       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6591       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6592     }
6593 
6594     // FIXME memcpy is used way more than necessary.  Correctness first.
6595     // Note: "by value" is code for passing a structure by value, not
6596     // basic types.
6597     if (Flags.isByVal()) {
6598       unsigned Size = Flags.getByValSize();
6599       // Very small objects are passed right-justified.  Everything else is
6600       // passed left-justified.
6601       if (Size==1 || Size==2) {
6602         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6603         if (GPR_idx != NumGPRs) {
6604           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6605                                         MachinePointerInfo(), VT);
6606           MemOpChains.push_back(Load.getValue(1));
6607           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6608 
6609           ArgOffset += PtrByteSize;
6610         } else {
6611           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6612                                           PtrOff.getValueType());
6613           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6614           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6615                                                             CallSeqStart,
6616                                                             Flags, DAG, dl);
6617           ArgOffset += PtrByteSize;
6618         }
6619         continue;
6620       }
6621       // Copy entire object into memory.  There are cases where gcc-generated
6622       // code assumes it is there, even if it could be put entirely into
6623       // registers.  (This is not what the doc says.)
6624       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6625                                                         CallSeqStart,
6626                                                         Flags, DAG, dl);
6627 
6628       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6629       // copy the pieces of the object that fit into registers from the
6630       // parameter save area.
6631       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6632         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6633         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6634         if (GPR_idx != NumGPRs) {
6635           SDValue Load =
6636               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6637           MemOpChains.push_back(Load.getValue(1));
6638           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6639           ArgOffset += PtrByteSize;
6640         } else {
6641           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6642           break;
6643         }
6644       }
6645       continue;
6646     }
6647 
6648     switch (Arg.getSimpleValueType().SimpleTy) {
6649     default: llvm_unreachable("Unexpected ValueType for argument!");
6650     case MVT::i1:
6651     case MVT::i32:
6652     case MVT::i64:
6653       if (GPR_idx != NumGPRs) {
6654         if (Arg.getValueType() == MVT::i1)
6655           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6656 
6657         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6658       } else {
6659         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6660                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6661                          TailCallArguments, dl);
6662       }
6663       ArgOffset += PtrByteSize;
6664       break;
6665     case MVT::f32:
6666     case MVT::f64:
6667       if (FPR_idx != NumFPRs) {
6668         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6669 
6670         if (CFlags.IsVarArg) {
6671           SDValue Store =
6672               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6673           MemOpChains.push_back(Store);
6674 
6675           // Float varargs are always shadowed in available integer registers
6676           if (GPR_idx != NumGPRs) {
6677             SDValue Load =
6678                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6679             MemOpChains.push_back(Load.getValue(1));
6680             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6681           }
6682           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6683             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6684             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6685             SDValue Load =
6686                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6687             MemOpChains.push_back(Load.getValue(1));
6688             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6689           }
6690         } else {
6691           // If we have any FPRs remaining, we may also have GPRs remaining.
6692           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6693           // GPRs.
6694           if (GPR_idx != NumGPRs)
6695             ++GPR_idx;
6696           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6697               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6698             ++GPR_idx;
6699         }
6700       } else
6701         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6702                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6703                          TailCallArguments, dl);
6704       if (isPPC64)
6705         ArgOffset += 8;
6706       else
6707         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6708       break;
6709     case MVT::v4f32:
6710     case MVT::v4i32:
6711     case MVT::v8i16:
6712     case MVT::v16i8:
6713       if (CFlags.IsVarArg) {
6714         // These go aligned on the stack, or in the corresponding R registers
6715         // when within range.  The Darwin PPC ABI doc claims they also go in
6716         // V registers; in fact gcc does this only for arguments that are
6717         // prototyped, not for those that match the ...  We do it for all
6718         // arguments, seems to work.
6719         while (ArgOffset % 16 !=0) {
6720           ArgOffset += PtrByteSize;
6721           if (GPR_idx != NumGPRs)
6722             GPR_idx++;
6723         }
6724         // We could elide this store in the case where the object fits
6725         // entirely in R registers.  Maybe later.
6726         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6727                              DAG.getConstant(ArgOffset, dl, PtrVT));
6728         SDValue Store =
6729             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6730         MemOpChains.push_back(Store);
6731         if (VR_idx != NumVRs) {
6732           SDValue Load =
6733               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6734           MemOpChains.push_back(Load.getValue(1));
6735           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6736         }
6737         ArgOffset += 16;
6738         for (unsigned i=0; i<16; i+=PtrByteSize) {
6739           if (GPR_idx == NumGPRs)
6740             break;
6741           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6742                                    DAG.getConstant(i, dl, PtrVT));
6743           SDValue Load =
6744               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6745           MemOpChains.push_back(Load.getValue(1));
6746           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6747         }
6748         break;
6749       }
6750 
6751       // Non-varargs Altivec params generally go in registers, but have
6752       // stack space allocated at the end.
6753       if (VR_idx != NumVRs) {
6754         // Doesn't have GPR space allocated.
6755         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6756       } else if (nAltivecParamsAtEnd==0) {
6757         // We are emitting Altivec params in order.
6758         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6759                          isPPC64, CFlags.IsTailCall, true, MemOpChains,
6760                          TailCallArguments, dl);
6761         ArgOffset += 16;
6762       }
6763       break;
6764     }
6765   }
6766   // If all Altivec parameters fit in registers, as they usually do,
6767   // they get stack space following the non-Altivec parameters.  We
6768   // don't track this here because nobody below needs it.
6769   // If there are more Altivec parameters than fit in registers emit
6770   // the stores here.
6771   if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
6772     unsigned j = 0;
6773     // Offset is aligned; skip 1st 12 params which go in V registers.
6774     ArgOffset = ((ArgOffset+15)/16)*16;
6775     ArgOffset += 12*16;
6776     for (unsigned i = 0; i != NumOps; ++i) {
6777       SDValue Arg = OutVals[i];
6778       EVT ArgType = Outs[i].VT;
6779       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6780           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6781         if (++j > NumVRs) {
6782           SDValue PtrOff;
6783           // We are emitting Altivec params in order.
6784           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6785                            isPPC64, CFlags.IsTailCall, true, MemOpChains,
6786                            TailCallArguments, dl);
6787           ArgOffset += 16;
6788         }
6789       }
6790     }
6791   }
6792 
6793   if (!MemOpChains.empty())
6794     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6795 
6796   // On Darwin, R12 must contain the address of an indirect callee.  This does
6797   // not mean the MTCTR instruction must use R12; it's easier to model this as
6798   // an extra parameter, so do that.
6799   if (CFlags.IsIndirect) {
6800     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
6801     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6802                                                    PPC::R12), Callee));
6803   }
6804 
6805   // Build a sequence of copy-to-reg nodes chained together with token chain
6806   // and flag operands which copy the outgoing args into the appropriate regs.
6807   SDValue InFlag;
6808   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6809     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6810                              RegsToPass[i].second, InFlag);
6811     InFlag = Chain.getValue(1);
6812   }
6813 
6814   if (CFlags.IsTailCall)
6815     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6816                     TailCallArguments);
6817 
6818   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6819                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6820 }
6821 
6822 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6823                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6824                    CCState &State) {
6825 
6826   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6827       State.getMachineFunction().getSubtarget());
6828   const bool IsPPC64 = Subtarget.isPPC64();
6829   const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6830   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6831 
6832   assert((!ValVT.isInteger() ||
6833           (ValVT.getSizeInBits() <= RegVT.getSizeInBits())) &&
6834          "Integer argument exceeds register size: should have been legalized");
6835 
6836   if (ValVT == MVT::f128)
6837     report_fatal_error("f128 is unimplemented on AIX.");
6838 
6839   if (ArgFlags.isNest())
6840     report_fatal_error("Nest arguments are unimplemented.");
6841 
6842   if (ValVT.isVector() || LocVT.isVector())
6843     report_fatal_error("Vector arguments are unimplemented on AIX.");
6844 
6845   static const MCPhysReg GPR_32[] = {// 32-bit registers.
6846                                      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6847                                      PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6848   static const MCPhysReg GPR_64[] = {// 64-bit registers.
6849                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6850                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6851 
6852   if (ArgFlags.isByVal()) {
6853     if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6854       report_fatal_error("Pass-by-value arguments with alignment greater than "
6855                          "register width are not supported.");
6856 
6857     const unsigned ByValSize = ArgFlags.getByValSize();
6858 
6859     // An empty aggregate parameter takes up no storage and no registers,
6860     // but needs a MemLoc for a stack slot for the formal arguments side.
6861     if (ByValSize == 0) {
6862       State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6863                                        State.getNextStackOffset(), RegVT,
6864                                        LocInfo));
6865       return false;
6866     }
6867 
6868     const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6869     unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6870     for (const unsigned E = Offset + StackSize; Offset < E;
6871          Offset += PtrAlign.value()) {
6872       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6873         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6874       else {
6875         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6876                                          Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6877                                          LocInfo));
6878         break;
6879       }
6880     }
6881     return false;
6882   }
6883 
6884   // Arguments always reserve parameter save area.
6885   switch (ValVT.SimpleTy) {
6886   default:
6887     report_fatal_error("Unhandled value type for argument.");
6888   case MVT::i64:
6889     // i64 arguments should have been split to i32 for PPC32.
6890     assert(IsPPC64 && "PPC32 should have split i64 values.");
6891     LLVM_FALLTHROUGH;
6892   case MVT::i1:
6893   case MVT::i32: {
6894     const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6895     // AIX integer arguments are always passed in register width.
6896     if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
6897       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6898                                   : CCValAssign::LocInfo::ZExt;
6899     if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6900       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6901     else
6902       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6903 
6904     return false;
6905   }
6906   case MVT::f32:
6907   case MVT::f64: {
6908     // Parameter save area (PSA) is reserved even if the float passes in fpr.
6909     const unsigned StoreSize = LocVT.getStoreSize();
6910     // Floats are always 4-byte aligned in the PSA on AIX.
6911     // This includes f64 in 64-bit mode for ABI compatibility.
6912     const unsigned Offset =
6913         State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6914     unsigned FReg = State.AllocateReg(FPR);
6915     if (FReg)
6916       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6917 
6918     // Reserve and initialize GPRs or initialize the PSA as required.
6919     for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6920       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6921         assert(FReg && "An FPR should be available when a GPR is reserved.");
6922         if (State.isVarArg()) {
6923           // Successfully reserved GPRs are only initialized for vararg calls.
6924           // Custom handling is required for:
6925           //   f64 in PPC32 needs to be split into 2 GPRs.
6926           //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6927           State.addLoc(
6928               CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6929         }
6930       } else {
6931         // If there are insufficient GPRs, the PSA needs to be initialized.
6932         // Initialization occurs even if an FPR was initialized for
6933         // compatibility with the AIX XL compiler. The full memory for the
6934         // argument will be initialized even if a prior word is saved in GPR.
6935         // A custom memLoc is used when the argument also passes in FPR so
6936         // that the callee handling can skip over it easily.
6937         State.addLoc(
6938             FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6939                                              LocInfo)
6940                  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6941         break;
6942       }
6943     }
6944 
6945     return false;
6946   }
6947   }
6948   return true;
6949 }
6950 
6951 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
6952                                                     bool IsPPC64) {
6953   assert((IsPPC64 || SVT != MVT::i64) &&
6954          "i64 should have been split for 32-bit codegen.");
6955 
6956   switch (SVT) {
6957   default:
6958     report_fatal_error("Unexpected value type for formal argument");
6959   case MVT::i1:
6960   case MVT::i32:
6961   case MVT::i64:
6962     return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6963   case MVT::f32:
6964     return &PPC::F4RCRegClass;
6965   case MVT::f64:
6966     return &PPC::F8RCRegClass;
6967   }
6968 }
6969 
6970 static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
6971                                         SelectionDAG &DAG, SDValue ArgValue,
6972                                         MVT LocVT, const SDLoc &dl) {
6973   assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6974   assert(ValVT.getSizeInBits() < LocVT.getSizeInBits());
6975 
6976   if (Flags.isSExt())
6977     ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6978                            DAG.getValueType(ValVT));
6979   else if (Flags.isZExt())
6980     ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6981                            DAG.getValueType(ValVT));
6982 
6983   return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6984 }
6985 
6986 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6987   const unsigned LASize = FL->getLinkageSize();
6988 
6989   if (PPC::GPRCRegClass.contains(Reg)) {
6990     assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6991            "Reg must be a valid argument register!");
6992     return LASize + 4 * (Reg - PPC::R3);
6993   }
6994 
6995   if (PPC::G8RCRegClass.contains(Reg)) {
6996     assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6997            "Reg must be a valid argument register!");
6998     return LASize + 8 * (Reg - PPC::X3);
6999   }
7000 
7001   llvm_unreachable("Only general purpose registers expected.");
7002 }
7003 
7004 //   AIX ABI Stack Frame Layout:
7005 //
7006 //   Low Memory +--------------------------------------------+
7007 //   SP   +---> | Back chain                                 | ---+
7008 //        |     +--------------------------------------------+    |
7009 //        |     | Saved Condition Register                   |    |
7010 //        |     +--------------------------------------------+    |
7011 //        |     | Saved Linkage Register                     |    |
7012 //        |     +--------------------------------------------+    | Linkage Area
7013 //        |     | Reserved for compilers                     |    |
7014 //        |     +--------------------------------------------+    |
7015 //        |     | Reserved for binders                       |    |
7016 //        |     +--------------------------------------------+    |
7017 //        |     | Saved TOC pointer                          | ---+
7018 //        |     +--------------------------------------------+
7019 //        |     | Parameter save area                        |
7020 //        |     +--------------------------------------------+
7021 //        |     | Alloca space                               |
7022 //        |     +--------------------------------------------+
7023 //        |     | Local variable space                       |
7024 //        |     +--------------------------------------------+
7025 //        |     | Float/int conversion temporary             |
7026 //        |     +--------------------------------------------+
7027 //        |     | Save area for AltiVec registers            |
7028 //        |     +--------------------------------------------+
7029 //        |     | AltiVec alignment padding                  |
7030 //        |     +--------------------------------------------+
7031 //        |     | Save area for VRSAVE register              |
7032 //        |     +--------------------------------------------+
7033 //        |     | Save area for General Purpose registers    |
7034 //        |     +--------------------------------------------+
7035 //        |     | Save area for Floating Point registers     |
7036 //        |     +--------------------------------------------+
7037 //        +---- | Back chain                                 |
7038 // High Memory  +--------------------------------------------+
7039 //
7040 //  Specifications:
7041 //  AIX 7.2 Assembler Language Reference
7042 //  Subroutine linkage convention
7043 
7044 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7045     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7046     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7047     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7048 
7049   assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7050           CallConv == CallingConv::Fast) &&
7051          "Unexpected calling convention!");
7052 
7053   if (getTargetMachine().Options.GuaranteedTailCallOpt)
7054     report_fatal_error("Tail call support is unimplemented on AIX.");
7055 
7056   if (useSoftFloat())
7057     report_fatal_error("Soft float support is unimplemented on AIX.");
7058 
7059   const PPCSubtarget &Subtarget =
7060       static_cast<const PPCSubtarget &>(DAG.getSubtarget());
7061 
7062   const bool IsPPC64 = Subtarget.isPPC64();
7063   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7064 
7065   // Assign locations to all of the incoming arguments.
7066   SmallVector<CCValAssign, 16> ArgLocs;
7067   MachineFunction &MF = DAG.getMachineFunction();
7068   MachineFrameInfo &MFI = MF.getFrameInfo();
7069   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7070 
7071   const EVT PtrVT = getPointerTy(MF.getDataLayout());
7072   // Reserve space for the linkage area on the stack.
7073   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7074   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7075   CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7076 
7077   SmallVector<SDValue, 8> MemOps;
7078 
7079   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7080     CCValAssign &VA = ArgLocs[I++];
7081     MVT LocVT = VA.getLocVT();
7082     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7083 
7084     // For compatibility with the AIX XL compiler, the float args in the
7085     // parameter save area are initialized even if the argument is available
7086     // in register.  The caller is required to initialize both the register
7087     // and memory, however, the callee can choose to expect it in either.
7088     // The memloc is dismissed here because the argument is retrieved from
7089     // the register.
7090     if (VA.isMemLoc() && VA.needsCustom())
7091       continue;
7092 
7093     if (Flags.isByVal() && VA.isMemLoc()) {
7094       const unsigned Size =
7095           alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7096                   PtrByteSize);
7097       const int FI = MF.getFrameInfo().CreateFixedObject(
7098           Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7099           /* IsAliased */ true);
7100       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7101       InVals.push_back(FIN);
7102 
7103       continue;
7104     }
7105 
7106     if (Flags.isByVal()) {
7107       assert(VA.isRegLoc() && "MemLocs should already be handled.");
7108 
7109       const MCPhysReg ArgReg = VA.getLocReg();
7110       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7111 
7112       if (Flags.getNonZeroByValAlign() > PtrByteSize)
7113         report_fatal_error("Over aligned byvals not supported yet.");
7114 
7115       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7116       const int FI = MF.getFrameInfo().CreateFixedObject(
7117           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7118           /* IsAliased */ true);
7119       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7120       InVals.push_back(FIN);
7121 
7122       // Add live ins for all the RegLocs for the same ByVal.
7123       const TargetRegisterClass *RegClass =
7124           IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7125 
7126       auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7127                                                unsigned Offset) {
7128         const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
7129         // Since the callers side has left justified the aggregate in the
7130         // register, we can simply store the entire register into the stack
7131         // slot.
7132         SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7133         // The store to the fixedstack object is needed becuase accessing a
7134         // field of the ByVal will use a gep and load. Ideally we will optimize
7135         // to extracting the value from the register directly, and elide the
7136         // stores when the arguments address is not taken, but that will need to
7137         // be future work.
7138         SDValue Store = DAG.getStore(
7139             CopyFrom.getValue(1), dl, CopyFrom,
7140             DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7141             MachinePointerInfo::getFixedStack(MF, FI, Offset));
7142 
7143         MemOps.push_back(Store);
7144       };
7145 
7146       unsigned Offset = 0;
7147       HandleRegLoc(VA.getLocReg(), Offset);
7148       Offset += PtrByteSize;
7149       for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7150            Offset += PtrByteSize) {
7151         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7152                "RegLocs should be for ByVal argument.");
7153 
7154         const CCValAssign RL = ArgLocs[I++];
7155         HandleRegLoc(RL.getLocReg(), Offset);
7156       }
7157 
7158       if (Offset != StackSize) {
7159         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7160                "Expected MemLoc for remaining bytes.");
7161         assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7162         // Consume the MemLoc.The InVal has already been emitted, so nothing
7163         // more needs to be done.
7164         ++I;
7165       }
7166 
7167       continue;
7168     }
7169 
7170     EVT ValVT = VA.getValVT();
7171     if (VA.isRegLoc() && !VA.needsCustom()) {
7172       MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
7173       unsigned VReg =
7174           MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
7175       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7176       if (ValVT.isScalarInteger() &&
7177           (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
7178         ArgValue =
7179             truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7180       }
7181       InVals.push_back(ArgValue);
7182       continue;
7183     }
7184     if (VA.isMemLoc()) {
7185       const unsigned LocSize = LocVT.getStoreSize();
7186       const unsigned ValSize = ValVT.getStoreSize();
7187       assert((ValSize <= LocSize) &&
7188              "Object size is larger than size of MemLoc");
7189       int CurArgOffset = VA.getLocMemOffset();
7190       // Objects are right-justified because AIX is big-endian.
7191       if (LocSize > ValSize)
7192         CurArgOffset += LocSize - ValSize;
7193       // Potential tail calls could cause overwriting of argument stack slots.
7194       const bool IsImmutable =
7195           !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7196             (CallConv == CallingConv::Fast));
7197       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7198       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7199       SDValue ArgValue =
7200           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7201       InVals.push_back(ArgValue);
7202       continue;
7203     }
7204   }
7205 
7206   // On AIX a minimum of 8 words is saved to the parameter save area.
7207   const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7208   // Area that is at least reserved in the caller of this function.
7209   unsigned CallerReservedArea =
7210       std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7211 
7212   // Set the size that is at least reserved in caller of this function. Tail
7213   // call optimized function's reserved stack space needs to be aligned so
7214   // that taking the difference between two stack areas will result in an
7215   // aligned stack.
7216   CallerReservedArea =
7217       EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7218   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7219   FuncInfo->setMinReservedArea(CallerReservedArea);
7220 
7221   if (isVarArg) {
7222     FuncInfo->setVarArgsFrameIndex(
7223         MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7224     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7225 
7226     static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7227                                        PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7228 
7229     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7230                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7231     const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7232 
7233     // The fixed integer arguments of a variadic function are stored to the
7234     // VarArgsFrameIndex on the stack so that they may be loaded by
7235     // dereferencing the result of va_next.
7236     for (unsigned GPRIndex =
7237              (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7238          GPRIndex < NumGPArgRegs; ++GPRIndex) {
7239 
7240       const unsigned VReg =
7241           IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7242                   : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7243 
7244       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7245       SDValue Store =
7246           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7247       MemOps.push_back(Store);
7248       // Increment the address for the next argument to store.
7249       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7250       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7251     }
7252   }
7253 
7254   if (!MemOps.empty())
7255     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7256 
7257   return Chain;
7258 }
7259 
7260 SDValue PPCTargetLowering::LowerCall_AIX(
7261     SDValue Chain, SDValue Callee, CallFlags CFlags,
7262     const SmallVectorImpl<ISD::OutputArg> &Outs,
7263     const SmallVectorImpl<SDValue> &OutVals,
7264     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7265     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7266     const CallBase *CB) const {
7267   // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7268   // AIX ABI stack frame layout.
7269 
7270   assert((CFlags.CallConv == CallingConv::C ||
7271           CFlags.CallConv == CallingConv::Cold ||
7272           CFlags.CallConv == CallingConv::Fast) &&
7273          "Unexpected calling convention!");
7274 
7275   if (CFlags.IsPatchPoint)
7276     report_fatal_error("This call type is unimplemented on AIX.");
7277 
7278   const PPCSubtarget& Subtarget =
7279       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7280   if (Subtarget.hasAltivec())
7281     report_fatal_error("Altivec support is unimplemented on AIX.");
7282 
7283   MachineFunction &MF = DAG.getMachineFunction();
7284   SmallVector<CCValAssign, 16> ArgLocs;
7285   CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7286                  *DAG.getContext());
7287 
7288   // Reserve space for the linkage save area (LSA) on the stack.
7289   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7290   //   [SP][CR][LR][2 x reserved][TOC].
7291   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7292   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7293   const bool IsPPC64 = Subtarget.isPPC64();
7294   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7295   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7296   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7297   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7298 
7299   // The prolog code of the callee may store up to 8 GPR argument registers to
7300   // the stack, allowing va_start to index over them in memory if the callee
7301   // is variadic.
7302   // Because we cannot tell if this is needed on the caller side, we have to
7303   // conservatively assume that it is needed.  As such, make sure we have at
7304   // least enough stack space for the caller to store the 8 GPRs.
7305   const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7306   const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7307                                      CCInfo.getNextStackOffset());
7308 
7309   // Adjust the stack pointer for the new arguments...
7310   // These operations are automatically eliminated by the prolog/epilog pass.
7311   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7312   SDValue CallSeqStart = Chain;
7313 
7314   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7315   SmallVector<SDValue, 8> MemOpChains;
7316 
7317   // Set up a copy of the stack pointer for loading and storing any
7318   // arguments that may not fit in the registers available for argument
7319   // passing.
7320   const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7321                                    : DAG.getRegister(PPC::R1, MVT::i32);
7322 
7323   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7324     const unsigned ValNo = ArgLocs[I].getValNo();
7325     SDValue Arg = OutVals[ValNo];
7326     ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7327 
7328     if (Flags.isByVal()) {
7329       const unsigned ByValSize = Flags.getByValSize();
7330 
7331       // Nothing to do for zero-sized ByVals on the caller side.
7332       if (!ByValSize) {
7333         ++I;
7334         continue;
7335       }
7336 
7337       auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7338         return DAG.getExtLoad(
7339             ISD::ZEXTLOAD, dl, PtrVT, Chain,
7340             (LoadOffset != 0)
7341                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7342                 : Arg,
7343             MachinePointerInfo(), VT);
7344       };
7345 
7346       unsigned LoadOffset = 0;
7347 
7348       // Initialize registers, which are fully occupied by the by-val argument.
7349       while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7350         SDValue Load = GetLoad(PtrVT, LoadOffset);
7351         MemOpChains.push_back(Load.getValue(1));
7352         LoadOffset += PtrByteSize;
7353         const CCValAssign &ByValVA = ArgLocs[I++];
7354         assert(ByValVA.getValNo() == ValNo &&
7355                "Unexpected location for pass-by-value argument.");
7356         RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7357       }
7358 
7359       if (LoadOffset == ByValSize)
7360         continue;
7361 
7362       // There must be one more loc to handle the remainder.
7363       assert(ArgLocs[I].getValNo() == ValNo &&
7364              "Expected additional location for by-value argument.");
7365 
7366       if (ArgLocs[I].isMemLoc()) {
7367         assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7368         const CCValAssign &ByValVA = ArgLocs[I++];
7369         ISD::ArgFlagsTy MemcpyFlags = Flags;
7370         // Only memcpy the bytes that don't pass in register.
7371         MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7372         Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7373             (LoadOffset != 0)
7374                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7375                 : Arg,
7376             DAG.getObjectPtrOffset(dl, StackPtr,
7377                                    TypeSize::Fixed(ByValVA.getLocMemOffset())),
7378             CallSeqStart, MemcpyFlags, DAG, dl);
7379         continue;
7380       }
7381 
7382       // Initialize the final register residue.
7383       // Any residue that occupies the final by-val arg register must be
7384       // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7385       // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7386       // 2 and 1 byte loads.
7387       const unsigned ResidueBytes = ByValSize % PtrByteSize;
7388       assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7389              "Unexpected register residue for by-value argument.");
7390       SDValue ResidueVal;
7391       for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7392         const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7393         const MVT VT =
7394             N == 1 ? MVT::i8
7395                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7396         SDValue Load = GetLoad(VT, LoadOffset);
7397         MemOpChains.push_back(Load.getValue(1));
7398         LoadOffset += N;
7399         Bytes += N;
7400 
7401         // By-val arguments are passed left-justfied in register.
7402         // Every load here needs to be shifted, otherwise a full register load
7403         // should have been used.
7404         assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7405                "Unexpected load emitted during handling of pass-by-value "
7406                "argument.");
7407         unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7408         EVT ShiftAmountTy =
7409             getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7410         SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7411         SDValue ShiftedLoad =
7412             DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7413         ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7414                                               ShiftedLoad)
7415                                 : ShiftedLoad;
7416       }
7417 
7418       const CCValAssign &ByValVA = ArgLocs[I++];
7419       RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7420       continue;
7421     }
7422 
7423     CCValAssign &VA = ArgLocs[I++];
7424     const MVT LocVT = VA.getLocVT();
7425     const MVT ValVT = VA.getValVT();
7426 
7427     switch (VA.getLocInfo()) {
7428     default:
7429       report_fatal_error("Unexpected argument extension type.");
7430     case CCValAssign::Full:
7431       break;
7432     case CCValAssign::ZExt:
7433       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7434       break;
7435     case CCValAssign::SExt:
7436       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7437       break;
7438     }
7439 
7440     if (VA.isRegLoc() && !VA.needsCustom()) {
7441       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7442       continue;
7443     }
7444 
7445     if (VA.isMemLoc()) {
7446       SDValue PtrOff =
7447           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7448       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7449       MemOpChains.push_back(
7450           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7451 
7452       continue;
7453     }
7454 
7455     // Custom handling is used for GPR initializations for vararg float
7456     // arguments.
7457     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7458            ValVT.isFloatingPoint() && LocVT.isInteger() &&
7459            "Unexpected register handling for calling convention.");
7460 
7461     SDValue ArgAsInt =
7462         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7463 
7464     if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7465       // f32 in 32-bit GPR
7466       // f64 in 64-bit GPR
7467       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7468     else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits())
7469       // f32 in 64-bit GPR.
7470       RegsToPass.push_back(std::make_pair(
7471           VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7472     else {
7473       // f64 in two 32-bit GPRs
7474       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7475       assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7476              "Unexpected custom register for argument!");
7477       CCValAssign &GPR1 = VA;
7478       SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7479                                      DAG.getConstant(32, dl, MVT::i8));
7480       RegsToPass.push_back(std::make_pair(
7481           GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7482 
7483       if (I != E) {
7484         // If only 1 GPR was available, there will only be one custom GPR and
7485         // the argument will also pass in memory.
7486         CCValAssign &PeekArg = ArgLocs[I];
7487         if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7488           assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7489           CCValAssign &GPR2 = ArgLocs[I++];
7490           RegsToPass.push_back(std::make_pair(
7491               GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7492         }
7493       }
7494     }
7495   }
7496 
7497   if (!MemOpChains.empty())
7498     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7499 
7500   // For indirect calls, we need to save the TOC base to the stack for
7501   // restoration after the call.
7502   if (CFlags.IsIndirect) {
7503     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7504     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7505     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7506     const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7507     const unsigned TOCSaveOffset =
7508         Subtarget.getFrameLowering()->getTOCSaveOffset();
7509 
7510     setUsesTOCBasePtr(DAG);
7511     SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7512     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7513     SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7514     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7515     Chain = DAG.getStore(
7516         Val.getValue(1), dl, Val, AddPtr,
7517         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7518   }
7519 
7520   // Build a sequence of copy-to-reg nodes chained together with token chain
7521   // and flag operands which copy the outgoing args into the appropriate regs.
7522   SDValue InFlag;
7523   for (auto Reg : RegsToPass) {
7524     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7525     InFlag = Chain.getValue(1);
7526   }
7527 
7528   const int SPDiff = 0;
7529   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7530                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
7531 }
7532 
7533 bool
7534 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7535                                   MachineFunction &MF, bool isVarArg,
7536                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
7537                                   LLVMContext &Context) const {
7538   SmallVector<CCValAssign, 16> RVLocs;
7539   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7540   return CCInfo.CheckReturn(
7541       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7542                 ? RetCC_PPC_Cold
7543                 : RetCC_PPC);
7544 }
7545 
7546 SDValue
7547 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7548                                bool isVarArg,
7549                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7550                                const SmallVectorImpl<SDValue> &OutVals,
7551                                const SDLoc &dl, SelectionDAG &DAG) const {
7552   SmallVector<CCValAssign, 16> RVLocs;
7553   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7554                  *DAG.getContext());
7555   CCInfo.AnalyzeReturn(Outs,
7556                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7557                            ? RetCC_PPC_Cold
7558                            : RetCC_PPC);
7559 
7560   SDValue Flag;
7561   SmallVector<SDValue, 4> RetOps(1, Chain);
7562 
7563   // Copy the result values into the output registers.
7564   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7565     CCValAssign &VA = RVLocs[i];
7566     assert(VA.isRegLoc() && "Can only return in registers!");
7567 
7568     SDValue Arg = OutVals[RealResIdx];
7569 
7570     switch (VA.getLocInfo()) {
7571     default: llvm_unreachable("Unknown loc info!");
7572     case CCValAssign::Full: break;
7573     case CCValAssign::AExt:
7574       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7575       break;
7576     case CCValAssign::ZExt:
7577       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7578       break;
7579     case CCValAssign::SExt:
7580       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7581       break;
7582     }
7583     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7584       bool isLittleEndian = Subtarget.isLittleEndian();
7585       // Legalize ret f64 -> ret 2 x i32.
7586       SDValue SVal =
7587           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7588                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7589       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7590       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7591       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7592                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7593       Flag = Chain.getValue(1);
7594       VA = RVLocs[++i]; // skip ahead to next loc
7595       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7596     } else
7597       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7598     Flag = Chain.getValue(1);
7599     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7600   }
7601 
7602   RetOps[0] = Chain;  // Update chain.
7603 
7604   // Add the flag if we have it.
7605   if (Flag.getNode())
7606     RetOps.push_back(Flag);
7607 
7608   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7609 }
7610 
7611 SDValue
7612 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7613                                                 SelectionDAG &DAG) const {
7614   SDLoc dl(Op);
7615 
7616   // Get the correct type for integers.
7617   EVT IntVT = Op.getValueType();
7618 
7619   // Get the inputs.
7620   SDValue Chain = Op.getOperand(0);
7621   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7622   // Build a DYNAREAOFFSET node.
7623   SDValue Ops[2] = {Chain, FPSIdx};
7624   SDVTList VTs = DAG.getVTList(IntVT);
7625   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7626 }
7627 
7628 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7629                                              SelectionDAG &DAG) const {
7630   // When we pop the dynamic allocation we need to restore the SP link.
7631   SDLoc dl(Op);
7632 
7633   // Get the correct type for pointers.
7634   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7635 
7636   // Construct the stack pointer operand.
7637   bool isPPC64 = Subtarget.isPPC64();
7638   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7639   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7640 
7641   // Get the operands for the STACKRESTORE.
7642   SDValue Chain = Op.getOperand(0);
7643   SDValue SaveSP = Op.getOperand(1);
7644 
7645   // Load the old link SP.
7646   SDValue LoadLinkSP =
7647       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7648 
7649   // Restore the stack pointer.
7650   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7651 
7652   // Store the old link SP.
7653   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7654 }
7655 
7656 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7657   MachineFunction &MF = DAG.getMachineFunction();
7658   bool isPPC64 = Subtarget.isPPC64();
7659   EVT PtrVT = getPointerTy(MF.getDataLayout());
7660 
7661   // Get current frame pointer save index.  The users of this index will be
7662   // primarily DYNALLOC instructions.
7663   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7664   int RASI = FI->getReturnAddrSaveIndex();
7665 
7666   // If the frame pointer save index hasn't been defined yet.
7667   if (!RASI) {
7668     // Find out what the fix offset of the frame pointer save area.
7669     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7670     // Allocate the frame index for frame pointer save area.
7671     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7672     // Save the result.
7673     FI->setReturnAddrSaveIndex(RASI);
7674   }
7675   return DAG.getFrameIndex(RASI, PtrVT);
7676 }
7677 
7678 SDValue
7679 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7680   MachineFunction &MF = DAG.getMachineFunction();
7681   bool isPPC64 = Subtarget.isPPC64();
7682   EVT PtrVT = getPointerTy(MF.getDataLayout());
7683 
7684   // Get current frame pointer save index.  The users of this index will be
7685   // primarily DYNALLOC instructions.
7686   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7687   int FPSI = FI->getFramePointerSaveIndex();
7688 
7689   // If the frame pointer save index hasn't been defined yet.
7690   if (!FPSI) {
7691     // Find out what the fix offset of the frame pointer save area.
7692     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7693     // Allocate the frame index for frame pointer save area.
7694     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7695     // Save the result.
7696     FI->setFramePointerSaveIndex(FPSI);
7697   }
7698   return DAG.getFrameIndex(FPSI, PtrVT);
7699 }
7700 
7701 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7702                                                    SelectionDAG &DAG) const {
7703   MachineFunction &MF = DAG.getMachineFunction();
7704   // Get the inputs.
7705   SDValue Chain = Op.getOperand(0);
7706   SDValue Size  = Op.getOperand(1);
7707   SDLoc dl(Op);
7708 
7709   // Get the correct type for pointers.
7710   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7711   // Negate the size.
7712   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7713                                 DAG.getConstant(0, dl, PtrVT), Size);
7714   // Construct a node for the frame pointer save index.
7715   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7716   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7717   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7718   if (hasInlineStackProbe(MF))
7719     return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7720   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7721 }
7722 
7723 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7724                                                      SelectionDAG &DAG) const {
7725   MachineFunction &MF = DAG.getMachineFunction();
7726 
7727   bool isPPC64 = Subtarget.isPPC64();
7728   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7729 
7730   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7731   return DAG.getFrameIndex(FI, PtrVT);
7732 }
7733 
7734 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7735                                                SelectionDAG &DAG) const {
7736   SDLoc DL(Op);
7737   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7738                      DAG.getVTList(MVT::i32, MVT::Other),
7739                      Op.getOperand(0), Op.getOperand(1));
7740 }
7741 
7742 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7743                                                 SelectionDAG &DAG) const {
7744   SDLoc DL(Op);
7745   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7746                      Op.getOperand(0), Op.getOperand(1));
7747 }
7748 
7749 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7750 
7751   assert(Op.getValueType() == MVT::i1 &&
7752          "Custom lowering only for i1 loads");
7753 
7754   // First, load 8 bits into 32 bits, then truncate to 1 bit.
7755 
7756   SDLoc dl(Op);
7757   LoadSDNode *LD = cast<LoadSDNode>(Op);
7758 
7759   SDValue Chain = LD->getChain();
7760   SDValue BasePtr = LD->getBasePtr();
7761   MachineMemOperand *MMO = LD->getMemOperand();
7762 
7763   SDValue NewLD =
7764       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7765                      BasePtr, MVT::i8, MMO);
7766   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7767 
7768   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7769   return DAG.getMergeValues(Ops, dl);
7770 }
7771 
7772 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7773   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7774          "Custom lowering only for i1 stores");
7775 
7776   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7777 
7778   SDLoc dl(Op);
7779   StoreSDNode *ST = cast<StoreSDNode>(Op);
7780 
7781   SDValue Chain = ST->getChain();
7782   SDValue BasePtr = ST->getBasePtr();
7783   SDValue Value = ST->getValue();
7784   MachineMemOperand *MMO = ST->getMemOperand();
7785 
7786   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7787                       Value);
7788   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7789 }
7790 
7791 // FIXME: Remove this once the ANDI glue bug is fixed:
7792 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7793   assert(Op.getValueType() == MVT::i1 &&
7794          "Custom lowering only for i1 results");
7795 
7796   SDLoc DL(Op);
7797   return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7798 }
7799 
7800 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7801                                                SelectionDAG &DAG) const {
7802 
7803   // Implements a vector truncate that fits in a vector register as a shuffle.
7804   // We want to legalize vector truncates down to where the source fits in
7805   // a vector register (and target is therefore smaller than vector register
7806   // size).  At that point legalization will try to custom lower the sub-legal
7807   // result and get here - where we can contain the truncate as a single target
7808   // operation.
7809 
7810   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7811   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7812   //
7813   // We will implement it for big-endian ordering as this (where x denotes
7814   // undefined):
7815   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7816   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7817   //
7818   // The same operation in little-endian ordering will be:
7819   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7820   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7821 
7822   assert(Op.getValueType().isVector() && "Vector type expected.");
7823 
7824   SDLoc DL(Op);
7825   SDValue N1 = Op.getOperand(0);
7826   unsigned SrcSize = N1.getValueType().getSizeInBits();
7827   assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
7828   SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7829 
7830   EVT TrgVT = Op.getValueType();
7831   unsigned TrgNumElts = TrgVT.getVectorNumElements();
7832   EVT EltVT = TrgVT.getVectorElementType();
7833   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7834   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7835 
7836   // First list the elements we want to keep.
7837   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7838   SmallVector<int, 16> ShuffV;
7839   if (Subtarget.isLittleEndian())
7840     for (unsigned i = 0; i < TrgNumElts; ++i)
7841       ShuffV.push_back(i * SizeMult);
7842   else
7843     for (unsigned i = 1; i <= TrgNumElts; ++i)
7844       ShuffV.push_back(i * SizeMult - 1);
7845 
7846   // Populate the remaining elements with undefs.
7847   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7848     // ShuffV.push_back(i + WideNumElts);
7849     ShuffV.push_back(WideNumElts + 1);
7850 
7851   SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
7852   return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
7853 }
7854 
7855 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7856 /// possible.
7857 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7858   // Not FP, or using SPE? Not a fsel.
7859   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7860       !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
7861     return Op;
7862 
7863   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7864 
7865   EVT ResVT = Op.getValueType();
7866   EVT CmpVT = Op.getOperand(0).getValueType();
7867   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7868   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
7869   SDLoc dl(Op);
7870   SDNodeFlags Flags = Op.getNode()->getFlags();
7871 
7872   // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7873   // presence of infinities.
7874   if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7875     switch (CC) {
7876     default:
7877       break;
7878     case ISD::SETOGT:
7879     case ISD::SETGT:
7880       return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
7881     case ISD::SETOLT:
7882     case ISD::SETLT:
7883       return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
7884     }
7885   }
7886 
7887   // We might be able to do better than this under some circumstances, but in
7888   // general, fsel-based lowering of select is a finite-math-only optimization.
7889   // For more information, see section F.3 of the 2.06 ISA specification.
7890   // With ISA 3.0
7891   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7892       (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7893     return Op;
7894 
7895   // If the RHS of the comparison is a 0.0, we don't need to do the
7896   // subtraction at all.
7897   SDValue Sel1;
7898   if (isFloatingPointZero(RHS))
7899     switch (CC) {
7900     default: break;       // SETUO etc aren't handled by fsel.
7901     case ISD::SETNE:
7902       std::swap(TV, FV);
7903       LLVM_FALLTHROUGH;
7904     case ISD::SETEQ:
7905       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7906         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7907       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7908       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7909         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7910       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7911                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7912     case ISD::SETULT:
7913     case ISD::SETLT:
7914       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7915       LLVM_FALLTHROUGH;
7916     case ISD::SETOGE:
7917     case ISD::SETGE:
7918       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7919         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7920       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7921     case ISD::SETUGT:
7922     case ISD::SETGT:
7923       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7924       LLVM_FALLTHROUGH;
7925     case ISD::SETOLE:
7926     case ISD::SETLE:
7927       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7928         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7929       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7930                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7931     }
7932 
7933   SDValue Cmp;
7934   switch (CC) {
7935   default: break;       // SETUO etc aren't handled by fsel.
7936   case ISD::SETNE:
7937     std::swap(TV, FV);
7938     LLVM_FALLTHROUGH;
7939   case ISD::SETEQ:
7940     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7941     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7942       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7943     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7944     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7945       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7946     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7947                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7948   case ISD::SETULT:
7949   case ISD::SETLT:
7950     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7951     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7952       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7953     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7954   case ISD::SETOGE:
7955   case ISD::SETGE:
7956     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7957     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7958       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7959     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7960   case ISD::SETUGT:
7961   case ISD::SETGT:
7962     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7963     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7964       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7965     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7966   case ISD::SETOLE:
7967   case ISD::SETLE:
7968     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7969     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7970       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7971     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7972   }
7973   return Op;
7974 }
7975 
7976 static unsigned getPPCStrictOpcode(unsigned Opc) {
7977   switch (Opc) {
7978   default:
7979     llvm_unreachable("No strict version of this opcode!");
7980   case PPCISD::FCTIDZ:
7981     return PPCISD::STRICT_FCTIDZ;
7982   case PPCISD::FCTIWZ:
7983     return PPCISD::STRICT_FCTIWZ;
7984   case PPCISD::FCTIDUZ:
7985     return PPCISD::STRICT_FCTIDUZ;
7986   case PPCISD::FCTIWUZ:
7987     return PPCISD::STRICT_FCTIWUZ;
7988   }
7989 }
7990 
7991 static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
7992                               const PPCSubtarget &Subtarget) {
7993   SDLoc dl(Op);
7994   bool IsStrict = Op->isStrictFPOpcode();
7995   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7996                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7997   // For strict nodes, source is the second operand.
7998   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7999   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8000   assert(Src.getValueType().isFloatingPoint());
8001   if (Src.getValueType() == MVT::f32) {
8002     if (IsStrict) {
8003       Src = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f64, MVT::Other},
8004                         {Chain, Src});
8005       Chain = Src.getValue(1);
8006     } else
8007       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8008   }
8009   SDValue Conv;
8010   unsigned Opc = ISD::DELETED_NODE;
8011   switch (Op.getSimpleValueType().SimpleTy) {
8012   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8013   case MVT::i32:
8014     Opc = IsSigned ? PPCISD::FCTIWZ
8015                    : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8016     break;
8017   case MVT::i64:
8018     assert((IsSigned || Subtarget.hasFPCVT()) &&
8019            "i64 FP_TO_UINT is supported only with FPCVT");
8020     Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8021   }
8022   if (IsStrict) {
8023     Opc = getPPCStrictOpcode(Opc);
8024     Conv = DAG.getNode(Opc, dl, {MVT::f64, MVT::Other}, {Chain, Src});
8025   } else {
8026     Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8027   }
8028   return Conv;
8029 }
8030 
8031 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8032                                                SelectionDAG &DAG,
8033                                                const SDLoc &dl) const {
8034   SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8035   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8036                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8037   bool IsStrict = Op->isStrictFPOpcode();
8038 
8039   // Convert the FP value to an int value through memory.
8040   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8041                   (IsSigned || Subtarget.hasFPCVT());
8042   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8043   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8044   MachinePointerInfo MPI =
8045       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8046 
8047   // Emit a store to the stack slot.
8048   SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8049   Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8050   if (i32Stack) {
8051     MachineFunction &MF = DAG.getMachineFunction();
8052     Alignment = Align(4);
8053     MachineMemOperand *MMO =
8054         MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8055     SDValue Ops[] = { Chain, Tmp, FIPtr };
8056     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8057               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8058   } else
8059     Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8060 
8061   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
8062   // add in a bias on big endian.
8063   if (Op.getValueType() == MVT::i32 && !i32Stack) {
8064     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8065                         DAG.getConstant(4, dl, FIPtr.getValueType()));
8066     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8067   }
8068 
8069   RLI.Chain = Chain;
8070   RLI.Ptr = FIPtr;
8071   RLI.MPI = MPI;
8072   RLI.Alignment = Alignment;
8073 }
8074 
8075 /// Custom lowers floating point to integer conversions to use
8076 /// the direct move instructions available in ISA 2.07 to avoid the
8077 /// need for load/store combinations.
8078 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8079                                                     SelectionDAG &DAG,
8080                                                     const SDLoc &dl) const {
8081   SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8082   SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8083   if (Op->isStrictFPOpcode())
8084     return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8085   else
8086     return Mov;
8087 }
8088 
8089 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8090                                           const SDLoc &dl) const {
8091   bool IsStrict = Op->isStrictFPOpcode();
8092   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8093                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8094   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8095   // FP to INT conversions are legal for f128.
8096   if (Src.getValueType() == MVT::f128)
8097     return Op;
8098 
8099   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8100   // PPC (the libcall is not available).
8101   if (Src.getValueType() == MVT::ppcf128 && !IsStrict) {
8102     if (Op.getValueType() == MVT::i32) {
8103       if (IsSigned) {
8104         SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8105                                  DAG.getIntPtrConstant(0, dl));
8106         SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8107                                  DAG.getIntPtrConstant(1, dl));
8108 
8109         // Add the two halves of the long double in round-to-zero mode.
8110         SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8111 
8112         // Now use a smaller FP_TO_SINT.
8113         return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8114       } else {
8115         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8116         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8117         SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
8118         //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8119         // FIXME: generated code sucks.
8120         // TODO: Are there fast-math-flags to propagate to this FSUB?
8121         SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Tmp);
8122         True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8123         True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
8124                            DAG.getConstant(0x80000000, dl, MVT::i32));
8125         SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8126         return DAG.getSelectCC(dl, Src, Tmp, True, False, ISD::SETGE);
8127       }
8128     }
8129 
8130     return SDValue();
8131   }
8132 
8133   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8134     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8135 
8136   ReuseLoadInfo RLI;
8137   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8138 
8139   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8140                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8141 }
8142 
8143 // We're trying to insert a regular store, S, and then a load, L. If the
8144 // incoming value, O, is a load, we might just be able to have our load use the
8145 // address used by O. However, we don't know if anything else will store to
8146 // that address before we can load from it. To prevent this situation, we need
8147 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8148 // the same chain operand as O, we create a token factor from the chain results
8149 // of O and L, and we replace all uses of O's chain result with that token
8150 // factor (see spliceIntoChain below for this last part).
8151 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8152                                             ReuseLoadInfo &RLI,
8153                                             SelectionDAG &DAG,
8154                                             ISD::LoadExtType ET) const {
8155   SDLoc dl(Op);
8156   bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8157                        (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8158   if (ET == ISD::NON_EXTLOAD &&
8159       (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8160       isOperationLegalOrCustom(Op.getOpcode(),
8161                                Op.getOperand(0).getValueType())) {
8162 
8163     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8164     return true;
8165   }
8166 
8167   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8168   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8169       LD->isNonTemporal())
8170     return false;
8171   if (LD->getMemoryVT() != MemVT)
8172     return false;
8173 
8174   RLI.Ptr = LD->getBasePtr();
8175   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8176     assert(LD->getAddressingMode() == ISD::PRE_INC &&
8177            "Non-pre-inc AM on PPC?");
8178     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8179                           LD->getOffset());
8180   }
8181 
8182   RLI.Chain = LD->getChain();
8183   RLI.MPI = LD->getPointerInfo();
8184   RLI.IsDereferenceable = LD->isDereferenceable();
8185   RLI.IsInvariant = LD->isInvariant();
8186   RLI.Alignment = LD->getAlign();
8187   RLI.AAInfo = LD->getAAInfo();
8188   RLI.Ranges = LD->getRanges();
8189 
8190   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8191   return true;
8192 }
8193 
8194 // Given the head of the old chain, ResChain, insert a token factor containing
8195 // it and NewResChain, and make users of ResChain now be users of that token
8196 // factor.
8197 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8198 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8199                                         SDValue NewResChain,
8200                                         SelectionDAG &DAG) const {
8201   if (!ResChain)
8202     return;
8203 
8204   SDLoc dl(NewResChain);
8205 
8206   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8207                            NewResChain, DAG.getUNDEF(MVT::Other));
8208   assert(TF.getNode() != NewResChain.getNode() &&
8209          "A new TF really is required here");
8210 
8211   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8212   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8213 }
8214 
8215 /// Analyze profitability of direct move
8216 /// prefer float load to int load plus direct move
8217 /// when there is no integer use of int load
8218 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8219   SDNode *Origin = Op.getOperand(0).getNode();
8220   if (Origin->getOpcode() != ISD::LOAD)
8221     return true;
8222 
8223   // If there is no LXSIBZX/LXSIHZX, like Power8,
8224   // prefer direct move if the memory size is 1 or 2 bytes.
8225   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8226   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8227     return true;
8228 
8229   for (SDNode::use_iterator UI = Origin->use_begin(),
8230                             UE = Origin->use_end();
8231        UI != UE; ++UI) {
8232 
8233     // Only look at the users of the loaded value.
8234     if (UI.getUse().get().getResNo() != 0)
8235       continue;
8236 
8237     if (UI->getOpcode() != ISD::SINT_TO_FP &&
8238         UI->getOpcode() != ISD::UINT_TO_FP)
8239       return true;
8240   }
8241 
8242   return false;
8243 }
8244 
8245 static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8246                               const PPCSubtarget &Subtarget) {
8247   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
8248   SDLoc dl(Op);
8249   // If we have FCFIDS, then use it when converting to single-precision.
8250   // Otherwise, convert to double-precision and then round.
8251   bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8252   unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8253                               : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8254   EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8255   return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8256 }
8257 
8258 /// Custom lowers integer to floating point conversions to use
8259 /// the direct move instructions available in ISA 2.07 to avoid the
8260 /// need for load/store combinations.
8261 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8262                                                     SelectionDAG &DAG,
8263                                                     const SDLoc &dl) const {
8264   assert((Op.getValueType() == MVT::f32 ||
8265           Op.getValueType() == MVT::f64) &&
8266          "Invalid floating point type as target of conversion");
8267   assert(Subtarget.hasFPCVT() &&
8268          "Int to FP conversions with direct moves require FPCVT");
8269   SDValue Src = Op.getOperand(0);
8270   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8271   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
8272   unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8273   SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8274   return convertIntToFP(Op, Mov, DAG, Subtarget);
8275 }
8276 
8277 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8278 
8279   EVT VecVT = Vec.getValueType();
8280   assert(VecVT.isVector() && "Expected a vector type.");
8281   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8282 
8283   EVT EltVT = VecVT.getVectorElementType();
8284   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8285   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8286 
8287   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8288   SmallVector<SDValue, 16> Ops(NumConcat);
8289   Ops[0] = Vec;
8290   SDValue UndefVec = DAG.getUNDEF(VecVT);
8291   for (unsigned i = 1; i < NumConcat; ++i)
8292     Ops[i] = UndefVec;
8293 
8294   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8295 }
8296 
8297 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8298                                                 const SDLoc &dl) const {
8299 
8300   unsigned Opc = Op.getOpcode();
8301   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
8302          "Unexpected conversion type");
8303   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8304          "Supports conversions to v2f64/v4f32 only.");
8305 
8306   bool SignedConv = Opc == ISD::SINT_TO_FP;
8307   bool FourEltRes = Op.getValueType() == MVT::v4f32;
8308 
8309   SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
8310   EVT WideVT = Wide.getValueType();
8311   unsigned WideNumElts = WideVT.getVectorNumElements();
8312   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8313 
8314   SmallVector<int, 16> ShuffV;
8315   for (unsigned i = 0; i < WideNumElts; ++i)
8316     ShuffV.push_back(i + WideNumElts);
8317 
8318   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8319   int SaveElts = FourEltRes ? 4 : 2;
8320   if (Subtarget.isLittleEndian())
8321     for (int i = 0; i < SaveElts; i++)
8322       ShuffV[i * Stride] = i;
8323   else
8324     for (int i = 1; i <= SaveElts; i++)
8325       ShuffV[i * Stride - 1] = i - 1;
8326 
8327   SDValue ShuffleSrc2 =
8328       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8329   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8330 
8331   SDValue Extend;
8332   if (SignedConv) {
8333     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8334     EVT ExtVT = Op.getOperand(0).getValueType();
8335     if (Subtarget.hasP9Altivec())
8336       ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8337                                IntermediateVT.getVectorNumElements());
8338 
8339     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8340                          DAG.getValueType(ExtVT));
8341   } else
8342     Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8343 
8344   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8345 }
8346 
8347 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8348                                           SelectionDAG &DAG) const {
8349   SDLoc dl(Op);
8350   SDValue Src = Op.getOperand(0);
8351   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
8352 
8353   EVT InVT = Src.getValueType();
8354   EVT OutVT = Op.getValueType();
8355   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8356       isOperationCustom(Op.getOpcode(), InVT))
8357     return LowerINT_TO_FPVector(Op, DAG, dl);
8358 
8359   // Conversions to f128 are legal.
8360   if (Op.getValueType() == MVT::f128)
8361     return Op;
8362 
8363   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8364   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8365     return SDValue();
8366 
8367   if (Src.getValueType() == MVT::i1)
8368     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8369                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
8370                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
8371 
8372   // If we have direct moves, we can do all the conversion, skip the store/load
8373   // however, without FPCVT we can't do most conversions.
8374   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8375       Subtarget.isPPC64() && Subtarget.hasFPCVT())
8376     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8377 
8378   assert((IsSigned || Subtarget.hasFPCVT()) &&
8379          "UINT_TO_FP is supported only with FPCVT");
8380 
8381   if (Src.getValueType() == MVT::i64) {
8382     SDValue SINT = Src;
8383     // When converting to single-precision, we actually need to convert
8384     // to double-precision first and then round to single-precision.
8385     // To avoid double-rounding effects during that operation, we have
8386     // to prepare the input operand.  Bits that might be truncated when
8387     // converting to double-precision are replaced by a bit that won't
8388     // be lost at this stage, but is below the single-precision rounding
8389     // position.
8390     //
8391     // However, if -enable-unsafe-fp-math is in effect, accept double
8392     // rounding to avoid the extra overhead.
8393     if (Op.getValueType() == MVT::f32 &&
8394         !Subtarget.hasFPCVT() &&
8395         !DAG.getTarget().Options.UnsafeFPMath) {
8396 
8397       // Twiddle input to make sure the low 11 bits are zero.  (If this
8398       // is the case, we are guaranteed the value will fit into the 53 bit
8399       // mantissa of an IEEE double-precision value without rounding.)
8400       // If any of those low 11 bits were not zero originally, make sure
8401       // bit 12 (value 2048) is set instead, so that the final rounding
8402       // to single-precision gets the correct result.
8403       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8404                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
8405       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8406                           Round, DAG.getConstant(2047, dl, MVT::i64));
8407       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8408       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8409                           Round, DAG.getConstant(-2048, dl, MVT::i64));
8410 
8411       // However, we cannot use that value unconditionally: if the magnitude
8412       // of the input value is small, the bit-twiddling we did above might
8413       // end up visibly changing the output.  Fortunately, in that case, we
8414       // don't need to twiddle bits since the original input will convert
8415       // exactly to double-precision floating-point already.  Therefore,
8416       // construct a conditional to use the original value if the top 11
8417       // bits are all sign-bit copies, and use the rounded value computed
8418       // above otherwise.
8419       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8420                                  SINT, DAG.getConstant(53, dl, MVT::i32));
8421       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8422                          Cond, DAG.getConstant(1, dl, MVT::i64));
8423       Cond = DAG.getSetCC(
8424           dl,
8425           getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8426           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8427 
8428       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8429     }
8430 
8431     ReuseLoadInfo RLI;
8432     SDValue Bits;
8433 
8434     MachineFunction &MF = DAG.getMachineFunction();
8435     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8436       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8437                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8438       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8439     } else if (Subtarget.hasLFIWAX() &&
8440                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8441       MachineMemOperand *MMO =
8442         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8443                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8444       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8445       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8446                                      DAG.getVTList(MVT::f64, MVT::Other),
8447                                      Ops, MVT::i32, MMO);
8448       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8449     } else if (Subtarget.hasFPCVT() &&
8450                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8451       MachineMemOperand *MMO =
8452         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8453                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8454       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8455       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8456                                      DAG.getVTList(MVT::f64, MVT::Other),
8457                                      Ops, MVT::i32, MMO);
8458       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8459     } else if (((Subtarget.hasLFIWAX() &&
8460                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8461                 (Subtarget.hasFPCVT() &&
8462                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8463                SINT.getOperand(0).getValueType() == MVT::i32) {
8464       MachineFrameInfo &MFI = MF.getFrameInfo();
8465       EVT PtrVT = getPointerTy(DAG.getDataLayout());
8466 
8467       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8468       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8469 
8470       SDValue Store =
8471           DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
8472                        MachinePointerInfo::getFixedStack(
8473                            DAG.getMachineFunction(), FrameIdx));
8474 
8475       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8476              "Expected an i32 store");
8477 
8478       RLI.Ptr = FIdx;
8479       RLI.Chain = Store;
8480       RLI.MPI =
8481           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8482       RLI.Alignment = Align(4);
8483 
8484       MachineMemOperand *MMO =
8485         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8486                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8487       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8488       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8489                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
8490                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
8491                                      Ops, MVT::i32, MMO);
8492     } else
8493       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8494 
8495     SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget);
8496 
8497     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
8498       FP = DAG.getNode(ISD::FP_ROUND, dl,
8499                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
8500     return FP;
8501   }
8502 
8503   assert(Src.getValueType() == MVT::i32 &&
8504          "Unhandled INT_TO_FP type in custom expander!");
8505   // Since we only generate this in 64-bit mode, we can take advantage of
8506   // 64-bit registers.  In particular, sign extend the input value into the
8507   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8508   // then lfd it and fcfid it.
8509   MachineFunction &MF = DAG.getMachineFunction();
8510   MachineFrameInfo &MFI = MF.getFrameInfo();
8511   EVT PtrVT = getPointerTy(MF.getDataLayout());
8512 
8513   SDValue Ld;
8514   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8515     ReuseLoadInfo RLI;
8516     bool ReusingLoad;
8517     if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8518       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8519       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8520 
8521       SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Src, FIdx,
8522                                    MachinePointerInfo::getFixedStack(
8523                                        DAG.getMachineFunction(), FrameIdx));
8524 
8525       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8526              "Expected an i32 store");
8527 
8528       RLI.Ptr = FIdx;
8529       RLI.Chain = Store;
8530       RLI.MPI =
8531           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8532       RLI.Alignment = Align(4);
8533     }
8534 
8535     MachineMemOperand *MMO =
8536       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8537                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8538     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8539     Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8540                                  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8541                                  MVT::i32, MMO);
8542     if (ReusingLoad)
8543       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8544   } else {
8545     assert(Subtarget.isPPC64() &&
8546            "i32->FP without LFIWAX supported only on PPC64");
8547 
8548     int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8549     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8550 
8551     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8552 
8553     // STD the extended value into the stack slot.
8554     SDValue Store = DAG.getStore(
8555         DAG.getEntryNode(), dl, Ext64, FIdx,
8556         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8557 
8558     // Load the value as a double.
8559     Ld = DAG.getLoad(
8560         MVT::f64, dl, Store, FIdx,
8561         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8562   }
8563 
8564   // FCFID it and return it.
8565   SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget);
8566   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
8567     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8568                      DAG.getIntPtrConstant(0, dl));
8569   return FP;
8570 }
8571 
8572 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8573                                             SelectionDAG &DAG) const {
8574   SDLoc dl(Op);
8575   /*
8576    The rounding mode is in bits 30:31 of FPSR, and has the following
8577    settings:
8578      00 Round to nearest
8579      01 Round to 0
8580      10 Round to +inf
8581      11 Round to -inf
8582 
8583   FLT_ROUNDS, on the other hand, expects the following:
8584     -1 Undefined
8585      0 Round to 0
8586      1 Round to nearest
8587      2 Round to +inf
8588      3 Round to -inf
8589 
8590   To perform the conversion, we do:
8591     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8592   */
8593 
8594   MachineFunction &MF = DAG.getMachineFunction();
8595   EVT VT = Op.getValueType();
8596   EVT PtrVT = getPointerTy(MF.getDataLayout());
8597 
8598   // Save FP Control Word to register
8599   SDValue Chain = Op.getOperand(0);
8600   SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8601   Chain = MFFS.getValue(1);
8602 
8603   // Save FP register to stack slot
8604   int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8605   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8606   Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8607 
8608   // Load FP Control Word from low 32 bits of stack slot.
8609   SDValue Four = DAG.getConstant(4, dl, PtrVT);
8610   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8611   SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8612   Chain = CWD.getValue(1);
8613 
8614   // Transform as necessary
8615   SDValue CWD1 =
8616     DAG.getNode(ISD::AND, dl, MVT::i32,
8617                 CWD, DAG.getConstant(3, dl, MVT::i32));
8618   SDValue CWD2 =
8619     DAG.getNode(ISD::SRL, dl, MVT::i32,
8620                 DAG.getNode(ISD::AND, dl, MVT::i32,
8621                             DAG.getNode(ISD::XOR, dl, MVT::i32,
8622                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
8623                             DAG.getConstant(3, dl, MVT::i32)),
8624                 DAG.getConstant(1, dl, MVT::i32));
8625 
8626   SDValue RetVal =
8627     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8628 
8629   RetVal =
8630       DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8631                   dl, VT, RetVal);
8632 
8633   return DAG.getMergeValues({RetVal, Chain}, dl);
8634 }
8635 
8636 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8637   EVT VT = Op.getValueType();
8638   unsigned BitWidth = VT.getSizeInBits();
8639   SDLoc dl(Op);
8640   assert(Op.getNumOperands() == 3 &&
8641          VT == Op.getOperand(1).getValueType() &&
8642          "Unexpected SHL!");
8643 
8644   // Expand into a bunch of logical ops.  Note that these ops
8645   // depend on the PPC behavior for oversized shift amounts.
8646   SDValue Lo = Op.getOperand(0);
8647   SDValue Hi = Op.getOperand(1);
8648   SDValue Amt = Op.getOperand(2);
8649   EVT AmtVT = Amt.getValueType();
8650 
8651   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8652                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8653   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8654   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8655   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8656   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8657                              DAG.getConstant(-BitWidth, dl, AmtVT));
8658   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8659   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8660   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8661   SDValue OutOps[] = { OutLo, OutHi };
8662   return DAG.getMergeValues(OutOps, dl);
8663 }
8664 
8665 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8666   EVT VT = Op.getValueType();
8667   SDLoc dl(Op);
8668   unsigned BitWidth = VT.getSizeInBits();
8669   assert(Op.getNumOperands() == 3 &&
8670          VT == Op.getOperand(1).getValueType() &&
8671          "Unexpected SRL!");
8672 
8673   // Expand into a bunch of logical ops.  Note that these ops
8674   // depend on the PPC behavior for oversized shift amounts.
8675   SDValue Lo = Op.getOperand(0);
8676   SDValue Hi = Op.getOperand(1);
8677   SDValue Amt = Op.getOperand(2);
8678   EVT AmtVT = Amt.getValueType();
8679 
8680   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8681                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8682   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8683   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8684   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8685   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8686                              DAG.getConstant(-BitWidth, dl, AmtVT));
8687   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8688   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8689   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8690   SDValue OutOps[] = { OutLo, OutHi };
8691   return DAG.getMergeValues(OutOps, dl);
8692 }
8693 
8694 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8695   SDLoc dl(Op);
8696   EVT VT = Op.getValueType();
8697   unsigned BitWidth = VT.getSizeInBits();
8698   assert(Op.getNumOperands() == 3 &&
8699          VT == Op.getOperand(1).getValueType() &&
8700          "Unexpected SRA!");
8701 
8702   // Expand into a bunch of logical ops, followed by a select_cc.
8703   SDValue Lo = Op.getOperand(0);
8704   SDValue Hi = Op.getOperand(1);
8705   SDValue Amt = Op.getOperand(2);
8706   EVT AmtVT = Amt.getValueType();
8707 
8708   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8709                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8710   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8711   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8712   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8713   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8714                              DAG.getConstant(-BitWidth, dl, AmtVT));
8715   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8716   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8717   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8718                                   Tmp4, Tmp6, ISD::SETLE);
8719   SDValue OutOps[] = { OutLo, OutHi };
8720   return DAG.getMergeValues(OutOps, dl);
8721 }
8722 
8723 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8724                                             SelectionDAG &DAG) const {
8725   SDLoc dl(Op);
8726   EVT VT = Op.getValueType();
8727   unsigned BitWidth = VT.getSizeInBits();
8728 
8729   bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8730   SDValue X = Op.getOperand(0);
8731   SDValue Y = Op.getOperand(1);
8732   SDValue Z = Op.getOperand(2);
8733   EVT AmtVT = Z.getValueType();
8734 
8735   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8736   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8737   // This is simpler than TargetLowering::expandFunnelShift because we can rely
8738   // on PowerPC shift by BW being well defined.
8739   Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8740                   DAG.getConstant(BitWidth - 1, dl, AmtVT));
8741   SDValue SubZ =
8742       DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8743   X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8744   Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8745   return DAG.getNode(ISD::OR, dl, VT, X, Y);
8746 }
8747 
8748 //===----------------------------------------------------------------------===//
8749 // Vector related lowering.
8750 //
8751 
8752 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8753 /// element size of SplatSize. Cast the result to VT.
8754 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8755                                       SelectionDAG &DAG, const SDLoc &dl) {
8756   static const MVT VTys[] = { // canonical VT to use for each size.
8757     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8758   };
8759 
8760   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8761 
8762   // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8763   if (Val == ((1LU << (SplatSize * 8)) - 1)) {
8764     SplatSize = 1;
8765     Val = 0xFF;
8766   }
8767 
8768   EVT CanonicalVT = VTys[SplatSize-1];
8769 
8770   // Build a canonical splat for this value.
8771   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8772 }
8773 
8774 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8775 /// specified intrinsic ID.
8776 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8777                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
8778   if (DestVT == MVT::Other) DestVT = Op.getValueType();
8779   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8780                      DAG.getConstant(IID, dl, MVT::i32), Op);
8781 }
8782 
8783 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8784 /// specified intrinsic ID.
8785 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8786                                 SelectionDAG &DAG, const SDLoc &dl,
8787                                 EVT DestVT = MVT::Other) {
8788   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8789   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8790                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8791 }
8792 
8793 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8794 /// specified intrinsic ID.
8795 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8796                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8797                                 EVT DestVT = MVT::Other) {
8798   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8799   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8800                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8801 }
8802 
8803 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8804 /// amount.  The result has the specified value type.
8805 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8806                            SelectionDAG &DAG, const SDLoc &dl) {
8807   // Force LHS/RHS to be the right type.
8808   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8809   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8810 
8811   int Ops[16];
8812   for (unsigned i = 0; i != 16; ++i)
8813     Ops[i] = i + Amt;
8814   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8815   return DAG.getNode(ISD::BITCAST, dl, VT, T);
8816 }
8817 
8818 /// Do we have an efficient pattern in a .td file for this node?
8819 ///
8820 /// \param V - pointer to the BuildVectorSDNode being matched
8821 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8822 ///
8823 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8824 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8825 /// the opposite is true (expansion is beneficial) are:
8826 /// - The node builds a vector out of integers that are not 32 or 64-bits
8827 /// - The node builds a vector out of constants
8828 /// - The node is a "load-and-splat"
8829 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8830 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
8831                                             bool HasDirectMove,
8832                                             bool HasP8Vector) {
8833   EVT VecVT = V->getValueType(0);
8834   bool RightType = VecVT == MVT::v2f64 ||
8835     (HasP8Vector && VecVT == MVT::v4f32) ||
8836     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8837   if (!RightType)
8838     return false;
8839 
8840   bool IsSplat = true;
8841   bool IsLoad = false;
8842   SDValue Op0 = V->getOperand(0);
8843 
8844   // This function is called in a block that confirms the node is not a constant
8845   // splat. So a constant BUILD_VECTOR here means the vector is built out of
8846   // different constants.
8847   if (V->isConstant())
8848     return false;
8849   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8850     if (V->getOperand(i).isUndef())
8851       return false;
8852     // We want to expand nodes that represent load-and-splat even if the
8853     // loaded value is a floating point truncation or conversion to int.
8854     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8855         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8856          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8857         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8858          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8859         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8860          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
8861       IsLoad = true;
8862     // If the operands are different or the input is not a load and has more
8863     // uses than just this BV node, then it isn't a splat.
8864     if (V->getOperand(i) != Op0 ||
8865         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8866       IsSplat = false;
8867   }
8868   return !(IsSplat && IsLoad);
8869 }
8870 
8871 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8872 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8873 
8874   SDLoc dl(Op);
8875   SDValue Op0 = Op->getOperand(0);
8876 
8877   if ((Op.getValueType() != MVT::f128) ||
8878       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8879       (Op0.getOperand(0).getValueType() != MVT::i64) ||
8880       (Op0.getOperand(1).getValueType() != MVT::i64))
8881     return SDValue();
8882 
8883   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8884                      Op0.getOperand(1));
8885 }
8886 
8887 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
8888   const SDValue *InputLoad = &Op;
8889   if (InputLoad->getOpcode() == ISD::BITCAST)
8890     InputLoad = &InputLoad->getOperand(0);
8891   if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
8892       InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
8893     IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
8894     InputLoad = &InputLoad->getOperand(0);
8895   }
8896   if (InputLoad->getOpcode() != ISD::LOAD)
8897     return nullptr;
8898   LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
8899   return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
8900 }
8901 
8902 // Convert the argument APFloat to a single precision APFloat if there is no
8903 // loss in information during the conversion to single precision APFloat and the
8904 // resulting number is not a denormal number. Return true if successful.
8905 bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
8906   APFloat APFloatToConvert = ArgAPFloat;
8907   bool LosesInfo = true;
8908   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
8909                            &LosesInfo);
8910   bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
8911   if (Success)
8912     ArgAPFloat = APFloatToConvert;
8913   return Success;
8914 }
8915 
8916 // Bitcast the argument APInt to a double and convert it to a single precision
8917 // APFloat, bitcast the APFloat to an APInt and assign it to the original
8918 // argument if there is no loss in information during the conversion from
8919 // double to single precision APFloat and the resulting number is not a denormal
8920 // number. Return true if successful.
8921 bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
8922   double DpValue = ArgAPInt.bitsToDouble();
8923   APFloat APFloatDp(DpValue);
8924   bool Success = convertToNonDenormSingle(APFloatDp);
8925   if (Success)
8926     ArgAPInt = APFloatDp.bitcastToAPInt();
8927   return Success;
8928 }
8929 
8930 // If this is a case we can't handle, return null and let the default
8931 // expansion code take care of it.  If we CAN select this case, and if it
8932 // selects to a single instruction, return Op.  Otherwise, if we can codegen
8933 // this case more efficiently than a constant pool load, lower it to the
8934 // sequence of ops that should be used.
8935 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
8936                                              SelectionDAG &DAG) const {
8937   SDLoc dl(Op);
8938   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
8939   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8940 
8941   // Check if this is a splat of a constant value.
8942   APInt APSplatBits, APSplatUndef;
8943   unsigned SplatBitSize;
8944   bool HasAnyUndefs;
8945   bool BVNIsConstantSplat =
8946       BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8947                            HasAnyUndefs, 0, !Subtarget.isLittleEndian());
8948 
8949   // If it is a splat of a double, check if we can shrink it to a 32 bit
8950   // non-denormal float which when converted back to double gives us the same
8951   // double. This is to exploit the XXSPLTIDP instruction.
8952   if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
8953       (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
8954       convertToNonDenormSingle(APSplatBits)) {
8955     SDValue SplatNode = DAG.getNode(
8956         PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
8957         DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
8958     return DAG.getBitcast(Op.getValueType(), SplatNode);
8959   }
8960 
8961   if (!BVNIsConstantSplat || SplatBitSize > 32) {
8962 
8963     bool IsPermutedLoad = false;
8964     const SDValue *InputLoad =
8965         getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
8966     // Handle load-and-splat patterns as we have instructions that will do this
8967     // in one go.
8968     if (InputLoad && DAG.isSplatValue(Op, true)) {
8969       LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
8970 
8971       // We have handling for 4 and 8 byte elements.
8972       unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
8973 
8974       // Checking for a single use of this load, we have to check for vector
8975       // width (128 bits) / ElementSize uses (since each operand of the
8976       // BUILD_VECTOR is a separate use of the value.
8977       if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
8978           ((Subtarget.hasVSX() && ElementSize == 64) ||
8979            (Subtarget.hasP9Vector() && ElementSize == 32))) {
8980         SDValue Ops[] = {
8981           LD->getChain(),    // Chain
8982           LD->getBasePtr(),  // Ptr
8983           DAG.getValueType(Op.getValueType()) // VT
8984         };
8985         return
8986           DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
8987                                   DAG.getVTList(Op.getValueType(), MVT::Other),
8988                                   Ops, LD->getMemoryVT(), LD->getMemOperand());
8989       }
8990     }
8991 
8992     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
8993     // lowered to VSX instructions under certain conditions.
8994     // Without VSX, there is no pattern more efficient than expanding the node.
8995     if (Subtarget.hasVSX() &&
8996         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
8997                                         Subtarget.hasP8Vector()))
8998       return Op;
8999     return SDValue();
9000   }
9001 
9002   uint64_t SplatBits = APSplatBits.getZExtValue();
9003   uint64_t SplatUndef = APSplatUndef.getZExtValue();
9004   unsigned SplatSize = SplatBitSize / 8;
9005 
9006   // First, handle single instruction cases.
9007 
9008   // All zeros?
9009   if (SplatBits == 0) {
9010     // Canonicalize all zero vectors to be v4i32.
9011     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9012       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9013       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9014     }
9015     return Op;
9016   }
9017 
9018   // We have XXSPLTIW for constant splats four bytes wide.
9019   // Given vector length is a multiple of 4, 2-byte splats can be replaced
9020   // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9021   // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9022   // turned into a 4-byte splat of 0xABABABAB.
9023   if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9024     return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
9025                                   Op.getValueType(), DAG, dl);
9026 
9027   if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9028     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9029                                   dl);
9030 
9031   // We have XXSPLTIB for constant splats one byte wide.
9032   if (Subtarget.hasP9Vector() && SplatSize == 1)
9033     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9034                                   dl);
9035 
9036   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9037   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9038                     (32-SplatBitSize));
9039   if (SextVal >= -16 && SextVal <= 15)
9040     return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9041                                   dl);
9042 
9043   // Two instruction sequences.
9044 
9045   // If this value is in the range [-32,30] and is even, use:
9046   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9047   // If this value is in the range [17,31] and is odd, use:
9048   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9049   // If this value is in the range [-31,-17] and is odd, use:
9050   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9051   // Note the last two are three-instruction sequences.
9052   if (SextVal >= -32 && SextVal <= 31) {
9053     // To avoid having these optimizations undone by constant folding,
9054     // we convert to a pseudo that will be expanded later into one of
9055     // the above forms.
9056     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9057     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9058               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9059     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9060     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9061     if (VT == Op.getValueType())
9062       return RetVal;
9063     else
9064       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9065   }
9066 
9067   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
9068   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
9069   // for fneg/fabs.
9070   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9071     // Make -1 and vspltisw -1:
9072     SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9073 
9074     // Make the VSLW intrinsic, computing 0x8000_0000.
9075     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9076                                    OnesV, DAG, dl);
9077 
9078     // xor by OnesV to invert it.
9079     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9080     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9081   }
9082 
9083   // Check to see if this is a wide variety of vsplti*, binop self cases.
9084   static const signed char SplatCsts[] = {
9085     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9086     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9087   };
9088 
9089   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9090     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9091     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
9092     int i = SplatCsts[idx];
9093 
9094     // Figure out what shift amount will be used by altivec if shifted by i in
9095     // this splat size.
9096     unsigned TypeShiftAmt = i & (SplatBitSize-1);
9097 
9098     // vsplti + shl self.
9099     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9100       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9101       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9102         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9103         Intrinsic::ppc_altivec_vslw
9104       };
9105       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9106       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9107     }
9108 
9109     // vsplti + srl self.
9110     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9111       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9112       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9113         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9114         Intrinsic::ppc_altivec_vsrw
9115       };
9116       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9117       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9118     }
9119 
9120     // vsplti + sra self.
9121     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9122       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9123       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9124         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
9125         Intrinsic::ppc_altivec_vsraw
9126       };
9127       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9128       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9129     }
9130 
9131     // vsplti + rol self.
9132     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9133                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9134       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9135       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9136         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9137         Intrinsic::ppc_altivec_vrlw
9138       };
9139       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9140       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9141     }
9142 
9143     // t = vsplti c, result = vsldoi t, t, 1
9144     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9145       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9146       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9147       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9148     }
9149     // t = vsplti c, result = vsldoi t, t, 2
9150     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9151       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9152       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9153       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9154     }
9155     // t = vsplti c, result = vsldoi t, t, 3
9156     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9157       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9158       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9159       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9160     }
9161   }
9162 
9163   return SDValue();
9164 }
9165 
9166 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9167 /// the specified operations to build the shuffle.
9168 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9169                                       SDValue RHS, SelectionDAG &DAG,
9170                                       const SDLoc &dl) {
9171   unsigned OpNum = (PFEntry >> 26) & 0x0F;
9172   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9173   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
9174 
9175   enum {
9176     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9177     OP_VMRGHW,
9178     OP_VMRGLW,
9179     OP_VSPLTISW0,
9180     OP_VSPLTISW1,
9181     OP_VSPLTISW2,
9182     OP_VSPLTISW3,
9183     OP_VSLDOI4,
9184     OP_VSLDOI8,
9185     OP_VSLDOI12
9186   };
9187 
9188   if (OpNum == OP_COPY) {
9189     if (LHSID == (1*9+2)*9+3) return LHS;
9190     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9191     return RHS;
9192   }
9193 
9194   SDValue OpLHS, OpRHS;
9195   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9196   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9197 
9198   int ShufIdxs[16];
9199   switch (OpNum) {
9200   default: llvm_unreachable("Unknown i32 permute!");
9201   case OP_VMRGHW:
9202     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
9203     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9204     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
9205     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9206     break;
9207   case OP_VMRGLW:
9208     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9209     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9210     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9211     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9212     break;
9213   case OP_VSPLTISW0:
9214     for (unsigned i = 0; i != 16; ++i)
9215       ShufIdxs[i] = (i&3)+0;
9216     break;
9217   case OP_VSPLTISW1:
9218     for (unsigned i = 0; i != 16; ++i)
9219       ShufIdxs[i] = (i&3)+4;
9220     break;
9221   case OP_VSPLTISW2:
9222     for (unsigned i = 0; i != 16; ++i)
9223       ShufIdxs[i] = (i&3)+8;
9224     break;
9225   case OP_VSPLTISW3:
9226     for (unsigned i = 0; i != 16; ++i)
9227       ShufIdxs[i] = (i&3)+12;
9228     break;
9229   case OP_VSLDOI4:
9230     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9231   case OP_VSLDOI8:
9232     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9233   case OP_VSLDOI12:
9234     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9235   }
9236   EVT VT = OpLHS.getValueType();
9237   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9238   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9239   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9240   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9241 }
9242 
9243 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9244 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9245 /// SDValue.
9246 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9247                                            SelectionDAG &DAG) const {
9248   const unsigned BytesInVector = 16;
9249   bool IsLE = Subtarget.isLittleEndian();
9250   SDLoc dl(N);
9251   SDValue V1 = N->getOperand(0);
9252   SDValue V2 = N->getOperand(1);
9253   unsigned ShiftElts = 0, InsertAtByte = 0;
9254   bool Swap = false;
9255 
9256   // Shifts required to get the byte we want at element 7.
9257   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
9258                                    0, 15, 14, 13, 12, 11, 10, 9};
9259   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9260                                 1, 2,  3,  4,  5,  6,  7,  8};
9261 
9262   ArrayRef<int> Mask = N->getMask();
9263   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9264 
9265   // For each mask element, find out if we're just inserting something
9266   // from V2 into V1 or vice versa.
9267   // Possible permutations inserting an element from V2 into V1:
9268   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9269   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9270   //   ...
9271   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9272   // Inserting from V1 into V2 will be similar, except mask range will be
9273   // [16,31].
9274 
9275   bool FoundCandidate = false;
9276   // If both vector operands for the shuffle are the same vector, the mask
9277   // will contain only elements from the first one and the second one will be
9278   // undef.
9279   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9280   // Go through the mask of half-words to find an element that's being moved
9281   // from one vector to the other.
9282   for (unsigned i = 0; i < BytesInVector; ++i) {
9283     unsigned CurrentElement = Mask[i];
9284     // If 2nd operand is undefined, we should only look for element 7 in the
9285     // Mask.
9286     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9287       continue;
9288 
9289     bool OtherElementsInOrder = true;
9290     // Examine the other elements in the Mask to see if they're in original
9291     // order.
9292     for (unsigned j = 0; j < BytesInVector; ++j) {
9293       if (j == i)
9294         continue;
9295       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9296       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
9297       // in which we always assume we're always picking from the 1st operand.
9298       int MaskOffset =
9299           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9300       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9301         OtherElementsInOrder = false;
9302         break;
9303       }
9304     }
9305     // If other elements are in original order, we record the number of shifts
9306     // we need to get the element we want into element 7. Also record which byte
9307     // in the vector we should insert into.
9308     if (OtherElementsInOrder) {
9309       // If 2nd operand is undefined, we assume no shifts and no swapping.
9310       if (V2.isUndef()) {
9311         ShiftElts = 0;
9312         Swap = false;
9313       } else {
9314         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9315         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9316                          : BigEndianShifts[CurrentElement & 0xF];
9317         Swap = CurrentElement < BytesInVector;
9318       }
9319       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9320       FoundCandidate = true;
9321       break;
9322     }
9323   }
9324 
9325   if (!FoundCandidate)
9326     return SDValue();
9327 
9328   // Candidate found, construct the proper SDAG sequence with VINSERTB,
9329   // optionally with VECSHL if shift is required.
9330   if (Swap)
9331     std::swap(V1, V2);
9332   if (V2.isUndef())
9333     V2 = V1;
9334   if (ShiftElts) {
9335     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9336                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9337     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9338                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9339   }
9340   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9341                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
9342 }
9343 
9344 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9345 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9346 /// SDValue.
9347 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9348                                            SelectionDAG &DAG) const {
9349   const unsigned NumHalfWords = 8;
9350   const unsigned BytesInVector = NumHalfWords * 2;
9351   // Check that the shuffle is on half-words.
9352   if (!isNByteElemShuffleMask(N, 2, 1))
9353     return SDValue();
9354 
9355   bool IsLE = Subtarget.isLittleEndian();
9356   SDLoc dl(N);
9357   SDValue V1 = N->getOperand(0);
9358   SDValue V2 = N->getOperand(1);
9359   unsigned ShiftElts = 0, InsertAtByte = 0;
9360   bool Swap = false;
9361 
9362   // Shifts required to get the half-word we want at element 3.
9363   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9364   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9365 
9366   uint32_t Mask = 0;
9367   uint32_t OriginalOrderLow = 0x1234567;
9368   uint32_t OriginalOrderHigh = 0x89ABCDEF;
9369   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
9370   // 32-bit space, only need 4-bit nibbles per element.
9371   for (unsigned i = 0; i < NumHalfWords; ++i) {
9372     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9373     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9374   }
9375 
9376   // For each mask element, find out if we're just inserting something
9377   // from V2 into V1 or vice versa.  Possible permutations inserting an element
9378   // from V2 into V1:
9379   //   X, 1, 2, 3, 4, 5, 6, 7
9380   //   0, X, 2, 3, 4, 5, 6, 7
9381   //   0, 1, X, 3, 4, 5, 6, 7
9382   //   0, 1, 2, X, 4, 5, 6, 7
9383   //   0, 1, 2, 3, X, 5, 6, 7
9384   //   0, 1, 2, 3, 4, X, 6, 7
9385   //   0, 1, 2, 3, 4, 5, X, 7
9386   //   0, 1, 2, 3, 4, 5, 6, X
9387   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9388 
9389   bool FoundCandidate = false;
9390   // Go through the mask of half-words to find an element that's being moved
9391   // from one vector to the other.
9392   for (unsigned i = 0; i < NumHalfWords; ++i) {
9393     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9394     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9395     uint32_t MaskOtherElts = ~(0xF << MaskShift);
9396     uint32_t TargetOrder = 0x0;
9397 
9398     // If both vector operands for the shuffle are the same vector, the mask
9399     // will contain only elements from the first one and the second one will be
9400     // undef.
9401     if (V2.isUndef()) {
9402       ShiftElts = 0;
9403       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9404       TargetOrder = OriginalOrderLow;
9405       Swap = false;
9406       // Skip if not the correct element or mask of other elements don't equal
9407       // to our expected order.
9408       if (MaskOneElt == VINSERTHSrcElem &&
9409           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9410         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9411         FoundCandidate = true;
9412         break;
9413       }
9414     } else { // If both operands are defined.
9415       // Target order is [8,15] if the current mask is between [0,7].
9416       TargetOrder =
9417           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9418       // Skip if mask of other elements don't equal our expected order.
9419       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9420         // We only need the last 3 bits for the number of shifts.
9421         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9422                          : BigEndianShifts[MaskOneElt & 0x7];
9423         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9424         Swap = MaskOneElt < NumHalfWords;
9425         FoundCandidate = true;
9426         break;
9427       }
9428     }
9429   }
9430 
9431   if (!FoundCandidate)
9432     return SDValue();
9433 
9434   // Candidate found, construct the proper SDAG sequence with VINSERTH,
9435   // optionally with VECSHL if shift is required.
9436   if (Swap)
9437     std::swap(V1, V2);
9438   if (V2.isUndef())
9439     V2 = V1;
9440   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9441   if (ShiftElts) {
9442     // Double ShiftElts because we're left shifting on v16i8 type.
9443     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9444                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9445     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9446     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9447                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9448     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9449   }
9450   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9451   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9452                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
9453   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9454 }
9455 
9456 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9457 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9458 /// return the default SDValue.
9459 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9460                                               SelectionDAG &DAG) const {
9461   // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9462   // to v16i8. Peek through the bitcasts to get the actual operands.
9463   SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9464   SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9465 
9466   auto ShuffleMask = SVN->getMask();
9467   SDValue VecShuffle(SVN, 0);
9468   SDLoc DL(SVN);
9469 
9470   // Check that we have a four byte shuffle.
9471   if (!isNByteElemShuffleMask(SVN, 4, 1))
9472     return SDValue();
9473 
9474   // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9475   if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9476     std::swap(LHS, RHS);
9477     VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
9478     ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9479   }
9480 
9481   // Ensure that the RHS is a vector of constants.
9482   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9483   if (!BVN)
9484     return SDValue();
9485 
9486   // Check if RHS is a splat of 4-bytes (or smaller).
9487   APInt APSplatValue, APSplatUndef;
9488   unsigned SplatBitSize;
9489   bool HasAnyUndefs;
9490   if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9491                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9492       SplatBitSize > 32)
9493     return SDValue();
9494 
9495   // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9496   // The instruction splats a constant C into two words of the source vector
9497   // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9498   // Thus we check that the shuffle mask is the equivalent  of
9499   // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9500   // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9501   // within each word are consecutive, so we only need to check the first byte.
9502   SDValue Index;
9503   bool IsLE = Subtarget.isLittleEndian();
9504   if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9505       (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9506        ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9507     Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9508   else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9509            (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9510             ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9511     Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9512   else
9513     return SDValue();
9514 
9515   // If the splat is narrower than 32-bits, we need to get the 32-bit value
9516   // for XXSPLTI32DX.
9517   unsigned SplatVal = APSplatValue.getZExtValue();
9518   for (; SplatBitSize < 32; SplatBitSize <<= 1)
9519     SplatVal |= (SplatVal << SplatBitSize);
9520 
9521   SDValue SplatNode = DAG.getNode(
9522       PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9523       Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9524   return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9525 }
9526 
9527 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9528 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9529 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9530 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9531 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9532   assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9533   assert(Op.getValueType() == MVT::v1i128 &&
9534          "Only set v1i128 as custom, other type shouldn't reach here!");
9535   SDLoc dl(Op);
9536   SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9537   SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9538   unsigned SHLAmt = N1.getConstantOperandVal(0);
9539   if (SHLAmt % 8 == 0) {
9540     SmallVector<int, 16> Mask(16, 0);
9541     std::iota(Mask.begin(), Mask.end(), 0);
9542     std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9543     if (SDValue Shuffle =
9544             DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9545                                  DAG.getUNDEF(MVT::v16i8), Mask))
9546       return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9547   }
9548   SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9549   SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9550                               DAG.getConstant(SHLAmt, dl, MVT::i32));
9551   SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9552                               DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9553   SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9554   return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9555 }
9556 
9557 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
9558 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
9559 /// return the code it can be lowered into.  Worst case, it can always be
9560 /// lowered into a vperm.
9561 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9562                                                SelectionDAG &DAG) const {
9563   SDLoc dl(Op);
9564   SDValue V1 = Op.getOperand(0);
9565   SDValue V2 = Op.getOperand(1);
9566   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9567 
9568   // Any nodes that were combined in the target-independent combiner prior
9569   // to vector legalization will not be sent to the target combine. Try to
9570   // combine it here.
9571   if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9572     if (!isa<ShuffleVectorSDNode>(NewShuffle))
9573       return NewShuffle;
9574     Op = NewShuffle;
9575     SVOp = cast<ShuffleVectorSDNode>(Op);
9576     V1 = Op.getOperand(0);
9577     V2 = Op.getOperand(1);
9578   }
9579   EVT VT = Op.getValueType();
9580   bool isLittleEndian = Subtarget.isLittleEndian();
9581 
9582   unsigned ShiftElts, InsertAtByte;
9583   bool Swap = false;
9584 
9585   // If this is a load-and-splat, we can do that with a single instruction
9586   // in some cases. However if the load has multiple uses, we don't want to
9587   // combine it because that will just produce multiple loads.
9588   bool IsPermutedLoad = false;
9589   const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9590   if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9591       (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9592       InputLoad->hasOneUse()) {
9593     bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9594     int SplatIdx =
9595       PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9596 
9597     // The splat index for permuted loads will be in the left half of the vector
9598     // which is strictly wider than the loaded value by 8 bytes. So we need to
9599     // adjust the splat index to point to the correct address in memory.
9600     if (IsPermutedLoad) {
9601       assert(isLittleEndian && "Unexpected permuted load on big endian target");
9602       SplatIdx += IsFourByte ? 2 : 1;
9603       assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9604              "Splat of a value outside of the loaded memory");
9605     }
9606 
9607     LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9608     // For 4-byte load-and-splat, we need Power9.
9609     if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9610       uint64_t Offset = 0;
9611       if (IsFourByte)
9612         Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9613       else
9614         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9615 
9616       SDValue BasePtr = LD->getBasePtr();
9617       if (Offset != 0)
9618         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9619                               BasePtr, DAG.getIntPtrConstant(Offset, dl));
9620       SDValue Ops[] = {
9621         LD->getChain(),    // Chain
9622         BasePtr,           // BasePtr
9623         DAG.getValueType(Op.getValueType()) // VT
9624       };
9625       SDVTList VTL =
9626         DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9627       SDValue LdSplt =
9628         DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9629                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
9630       if (LdSplt.getValueType() != SVOp->getValueType(0))
9631         LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9632       return LdSplt;
9633     }
9634   }
9635   if (Subtarget.hasP9Vector() &&
9636       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9637                            isLittleEndian)) {
9638     if (Swap)
9639       std::swap(V1, V2);
9640     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9641     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9642     if (ShiftElts) {
9643       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9644                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
9645       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9646                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9647       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9648     }
9649     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9650                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9651     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9652   }
9653 
9654   if (Subtarget.hasPrefixInstrs()) {
9655     SDValue SplatInsertNode;
9656     if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9657       return SplatInsertNode;
9658   }
9659 
9660   if (Subtarget.hasP9Altivec()) {
9661     SDValue NewISDNode;
9662     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9663       return NewISDNode;
9664 
9665     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9666       return NewISDNode;
9667   }
9668 
9669   if (Subtarget.hasVSX() &&
9670       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9671     if (Swap)
9672       std::swap(V1, V2);
9673     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9674     SDValue Conv2 =
9675         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9676 
9677     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9678                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9679     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9680   }
9681 
9682   if (Subtarget.hasVSX() &&
9683     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9684     if (Swap)
9685       std::swap(V1, V2);
9686     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9687     SDValue Conv2 =
9688         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9689 
9690     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9691                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9692     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9693   }
9694 
9695   if (Subtarget.hasP9Vector()) {
9696      if (PPC::isXXBRHShuffleMask(SVOp)) {
9697       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9698       SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9699       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9700     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9701       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9702       SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9703       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9704     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9705       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9706       SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9707       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9708     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9709       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9710       SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9711       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9712     }
9713   }
9714 
9715   if (Subtarget.hasVSX()) {
9716     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9717       int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9718 
9719       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9720       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9721                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
9722       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9723     }
9724 
9725     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9726     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9727       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9728       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
9729       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9730     }
9731   }
9732 
9733   // Cases that are handled by instructions that take permute immediates
9734   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9735   // selected by the instruction selector.
9736   if (V2.isUndef()) {
9737     if (PPC::isSplatShuffleMask(SVOp, 1) ||
9738         PPC::isSplatShuffleMask(SVOp, 2) ||
9739         PPC::isSplatShuffleMask(SVOp, 4) ||
9740         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
9741         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
9742         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9743         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9744         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9745         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9746         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9747         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9748         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9749         (Subtarget.hasP8Altivec() && (
9750          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
9751          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9752          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9753       return Op;
9754     }
9755   }
9756 
9757   // Altivec has a variety of "shuffle immediates" that take two vector inputs
9758   // and produce a fixed permutation.  If any of these match, do not lower to
9759   // VPERM.
9760   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
9761   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9762       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9763       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
9764       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9765       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9766       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9767       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9768       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9769       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9770       (Subtarget.hasP8Altivec() && (
9771        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9772        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
9773        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
9774     return Op;
9775 
9776   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
9777   // perfect shuffle table to emit an optimal matching sequence.
9778   ArrayRef<int> PermMask = SVOp->getMask();
9779 
9780   unsigned PFIndexes[4];
9781   bool isFourElementShuffle = true;
9782   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
9783     unsigned EltNo = 8;   // Start out undef.
9784     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
9785       if (PermMask[i*4+j] < 0)
9786         continue;   // Undef, ignore it.
9787 
9788       unsigned ByteSource = PermMask[i*4+j];
9789       if ((ByteSource & 3) != j) {
9790         isFourElementShuffle = false;
9791         break;
9792       }
9793 
9794       if (EltNo == 8) {
9795         EltNo = ByteSource/4;
9796       } else if (EltNo != ByteSource/4) {
9797         isFourElementShuffle = false;
9798         break;
9799       }
9800     }
9801     PFIndexes[i] = EltNo;
9802   }
9803 
9804   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
9805   // perfect shuffle vector to determine if it is cost effective to do this as
9806   // discrete instructions, or whether we should use a vperm.
9807   // For now, we skip this for little endian until such time as we have a
9808   // little-endian perfect shuffle table.
9809   if (isFourElementShuffle && !isLittleEndian) {
9810     // Compute the index in the perfect shuffle table.
9811     unsigned PFTableIndex =
9812       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9813 
9814     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9815     unsigned Cost  = (PFEntry >> 30);
9816 
9817     // Determining when to avoid vperm is tricky.  Many things affect the cost
9818     // of vperm, particularly how many times the perm mask needs to be computed.
9819     // For example, if the perm mask can be hoisted out of a loop or is already
9820     // used (perhaps because there are multiple permutes with the same shuffle
9821     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
9822     // the loop requires an extra register.
9823     //
9824     // As a compromise, we only emit discrete instructions if the shuffle can be
9825     // generated in 3 or fewer operations.  When we have loop information
9826     // available, if this block is within a loop, we should avoid using vperm
9827     // for 3-operation perms and use a constant pool load instead.
9828     if (Cost < 3)
9829       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9830   }
9831 
9832   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9833   // vector that will get spilled to the constant pool.
9834   if (V2.isUndef()) V2 = V1;
9835 
9836   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9837   // that it is in input element units, not in bytes.  Convert now.
9838 
9839   // For little endian, the order of the input vectors is reversed, and
9840   // the permutation mask is complemented with respect to 31.  This is
9841   // necessary to produce proper semantics with the big-endian-biased vperm
9842   // instruction.
9843   EVT EltVT = V1.getValueType().getVectorElementType();
9844   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
9845 
9846   SmallVector<SDValue, 16> ResultMask;
9847   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
9848     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
9849 
9850     for (unsigned j = 0; j != BytesPerElement; ++j)
9851       if (isLittleEndian)
9852         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
9853                                              dl, MVT::i32));
9854       else
9855         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
9856                                              MVT::i32));
9857   }
9858 
9859   ShufflesHandledWithVPERM++;
9860   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
9861   LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
9862   LLVM_DEBUG(SVOp->dump());
9863   LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
9864   LLVM_DEBUG(VPermMask.dump());
9865 
9866   if (isLittleEndian)
9867     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9868                        V2, V1, VPermMask);
9869   else
9870     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9871                        V1, V2, VPermMask);
9872 }
9873 
9874 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9875 /// vector comparison.  If it is, return true and fill in Opc/isDot with
9876 /// information about the intrinsic.
9877 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
9878                                  bool &isDot, const PPCSubtarget &Subtarget) {
9879   unsigned IntrinsicID =
9880       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9881   CompareOpc = -1;
9882   isDot = false;
9883   switch (IntrinsicID) {
9884   default:
9885     return false;
9886   // Comparison predicates.
9887   case Intrinsic::ppc_altivec_vcmpbfp_p:
9888     CompareOpc = 966;
9889     isDot = true;
9890     break;
9891   case Intrinsic::ppc_altivec_vcmpeqfp_p:
9892     CompareOpc = 198;
9893     isDot = true;
9894     break;
9895   case Intrinsic::ppc_altivec_vcmpequb_p:
9896     CompareOpc = 6;
9897     isDot = true;
9898     break;
9899   case Intrinsic::ppc_altivec_vcmpequh_p:
9900     CompareOpc = 70;
9901     isDot = true;
9902     break;
9903   case Intrinsic::ppc_altivec_vcmpequw_p:
9904     CompareOpc = 134;
9905     isDot = true;
9906     break;
9907   case Intrinsic::ppc_altivec_vcmpequd_p:
9908     if (Subtarget.hasP8Altivec()) {
9909       CompareOpc = 199;
9910       isDot = true;
9911     } else
9912       return false;
9913     break;
9914   case Intrinsic::ppc_altivec_vcmpneb_p:
9915   case Intrinsic::ppc_altivec_vcmpneh_p:
9916   case Intrinsic::ppc_altivec_vcmpnew_p:
9917   case Intrinsic::ppc_altivec_vcmpnezb_p:
9918   case Intrinsic::ppc_altivec_vcmpnezh_p:
9919   case Intrinsic::ppc_altivec_vcmpnezw_p:
9920     if (Subtarget.hasP9Altivec()) {
9921       switch (IntrinsicID) {
9922       default:
9923         llvm_unreachable("Unknown comparison intrinsic.");
9924       case Intrinsic::ppc_altivec_vcmpneb_p:
9925         CompareOpc = 7;
9926         break;
9927       case Intrinsic::ppc_altivec_vcmpneh_p:
9928         CompareOpc = 71;
9929         break;
9930       case Intrinsic::ppc_altivec_vcmpnew_p:
9931         CompareOpc = 135;
9932         break;
9933       case Intrinsic::ppc_altivec_vcmpnezb_p:
9934         CompareOpc = 263;
9935         break;
9936       case Intrinsic::ppc_altivec_vcmpnezh_p:
9937         CompareOpc = 327;
9938         break;
9939       case Intrinsic::ppc_altivec_vcmpnezw_p:
9940         CompareOpc = 391;
9941         break;
9942       }
9943       isDot = true;
9944     } else
9945       return false;
9946     break;
9947   case Intrinsic::ppc_altivec_vcmpgefp_p:
9948     CompareOpc = 454;
9949     isDot = true;
9950     break;
9951   case Intrinsic::ppc_altivec_vcmpgtfp_p:
9952     CompareOpc = 710;
9953     isDot = true;
9954     break;
9955   case Intrinsic::ppc_altivec_vcmpgtsb_p:
9956     CompareOpc = 774;
9957     isDot = true;
9958     break;
9959   case Intrinsic::ppc_altivec_vcmpgtsh_p:
9960     CompareOpc = 838;
9961     isDot = true;
9962     break;
9963   case Intrinsic::ppc_altivec_vcmpgtsw_p:
9964     CompareOpc = 902;
9965     isDot = true;
9966     break;
9967   case Intrinsic::ppc_altivec_vcmpgtsd_p:
9968     if (Subtarget.hasP8Altivec()) {
9969       CompareOpc = 967;
9970       isDot = true;
9971     } else
9972       return false;
9973     break;
9974   case Intrinsic::ppc_altivec_vcmpgtub_p:
9975     CompareOpc = 518;
9976     isDot = true;
9977     break;
9978   case Intrinsic::ppc_altivec_vcmpgtuh_p:
9979     CompareOpc = 582;
9980     isDot = true;
9981     break;
9982   case Intrinsic::ppc_altivec_vcmpgtuw_p:
9983     CompareOpc = 646;
9984     isDot = true;
9985     break;
9986   case Intrinsic::ppc_altivec_vcmpgtud_p:
9987     if (Subtarget.hasP8Altivec()) {
9988       CompareOpc = 711;
9989       isDot = true;
9990     } else
9991       return false;
9992     break;
9993 
9994   // VSX predicate comparisons use the same infrastructure
9995   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9996   case Intrinsic::ppc_vsx_xvcmpgedp_p:
9997   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9998   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9999   case Intrinsic::ppc_vsx_xvcmpgesp_p:
10000   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10001     if (Subtarget.hasVSX()) {
10002       switch (IntrinsicID) {
10003       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10004         CompareOpc = 99;
10005         break;
10006       case Intrinsic::ppc_vsx_xvcmpgedp_p:
10007         CompareOpc = 115;
10008         break;
10009       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10010         CompareOpc = 107;
10011         break;
10012       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10013         CompareOpc = 67;
10014         break;
10015       case Intrinsic::ppc_vsx_xvcmpgesp_p:
10016         CompareOpc = 83;
10017         break;
10018       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10019         CompareOpc = 75;
10020         break;
10021       }
10022       isDot = true;
10023     } else
10024       return false;
10025     break;
10026 
10027   // Normal Comparisons.
10028   case Intrinsic::ppc_altivec_vcmpbfp:
10029     CompareOpc = 966;
10030     break;
10031   case Intrinsic::ppc_altivec_vcmpeqfp:
10032     CompareOpc = 198;
10033     break;
10034   case Intrinsic::ppc_altivec_vcmpequb:
10035     CompareOpc = 6;
10036     break;
10037   case Intrinsic::ppc_altivec_vcmpequh:
10038     CompareOpc = 70;
10039     break;
10040   case Intrinsic::ppc_altivec_vcmpequw:
10041     CompareOpc = 134;
10042     break;
10043   case Intrinsic::ppc_altivec_vcmpequd:
10044     if (Subtarget.hasP8Altivec())
10045       CompareOpc = 199;
10046     else
10047       return false;
10048     break;
10049   case Intrinsic::ppc_altivec_vcmpneb:
10050   case Intrinsic::ppc_altivec_vcmpneh:
10051   case Intrinsic::ppc_altivec_vcmpnew:
10052   case Intrinsic::ppc_altivec_vcmpnezb:
10053   case Intrinsic::ppc_altivec_vcmpnezh:
10054   case Intrinsic::ppc_altivec_vcmpnezw:
10055     if (Subtarget.hasP9Altivec())
10056       switch (IntrinsicID) {
10057       default:
10058         llvm_unreachable("Unknown comparison intrinsic.");
10059       case Intrinsic::ppc_altivec_vcmpneb:
10060         CompareOpc = 7;
10061         break;
10062       case Intrinsic::ppc_altivec_vcmpneh:
10063         CompareOpc = 71;
10064         break;
10065       case Intrinsic::ppc_altivec_vcmpnew:
10066         CompareOpc = 135;
10067         break;
10068       case Intrinsic::ppc_altivec_vcmpnezb:
10069         CompareOpc = 263;
10070         break;
10071       case Intrinsic::ppc_altivec_vcmpnezh:
10072         CompareOpc = 327;
10073         break;
10074       case Intrinsic::ppc_altivec_vcmpnezw:
10075         CompareOpc = 391;
10076         break;
10077       }
10078     else
10079       return false;
10080     break;
10081   case Intrinsic::ppc_altivec_vcmpgefp:
10082     CompareOpc = 454;
10083     break;
10084   case Intrinsic::ppc_altivec_vcmpgtfp:
10085     CompareOpc = 710;
10086     break;
10087   case Intrinsic::ppc_altivec_vcmpgtsb:
10088     CompareOpc = 774;
10089     break;
10090   case Intrinsic::ppc_altivec_vcmpgtsh:
10091     CompareOpc = 838;
10092     break;
10093   case Intrinsic::ppc_altivec_vcmpgtsw:
10094     CompareOpc = 902;
10095     break;
10096   case Intrinsic::ppc_altivec_vcmpgtsd:
10097     if (Subtarget.hasP8Altivec())
10098       CompareOpc = 967;
10099     else
10100       return false;
10101     break;
10102   case Intrinsic::ppc_altivec_vcmpgtub:
10103     CompareOpc = 518;
10104     break;
10105   case Intrinsic::ppc_altivec_vcmpgtuh:
10106     CompareOpc = 582;
10107     break;
10108   case Intrinsic::ppc_altivec_vcmpgtuw:
10109     CompareOpc = 646;
10110     break;
10111   case Intrinsic::ppc_altivec_vcmpgtud:
10112     if (Subtarget.hasP8Altivec())
10113       CompareOpc = 711;
10114     else
10115       return false;
10116     break;
10117   }
10118   return true;
10119 }
10120 
10121 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10122 /// lower, do it, otherwise return null.
10123 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10124                                                    SelectionDAG &DAG) const {
10125   unsigned IntrinsicID =
10126     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10127 
10128   SDLoc dl(Op);
10129 
10130   if (IntrinsicID == Intrinsic::thread_pointer) {
10131     // Reads the thread pointer register, used for __builtin_thread_pointer.
10132     if (Subtarget.isPPC64())
10133       return DAG.getRegister(PPC::X13, MVT::i64);
10134     return DAG.getRegister(PPC::R2, MVT::i32);
10135   }
10136 
10137   // If this is a lowered altivec predicate compare, CompareOpc is set to the
10138   // opcode number of the comparison.
10139   int CompareOpc;
10140   bool isDot;
10141   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10142     return SDValue();    // Don't custom lower most intrinsics.
10143 
10144   // If this is a non-dot comparison, make the VCMP node and we are done.
10145   if (!isDot) {
10146     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10147                               Op.getOperand(1), Op.getOperand(2),
10148                               DAG.getConstant(CompareOpc, dl, MVT::i32));
10149     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10150   }
10151 
10152   // Create the PPCISD altivec 'dot' comparison node.
10153   SDValue Ops[] = {
10154     Op.getOperand(2),  // LHS
10155     Op.getOperand(3),  // RHS
10156     DAG.getConstant(CompareOpc, dl, MVT::i32)
10157   };
10158   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10159   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
10160 
10161   // Now that we have the comparison, emit a copy from the CR to a GPR.
10162   // This is flagged to the above dot comparison.
10163   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10164                                 DAG.getRegister(PPC::CR6, MVT::i32),
10165                                 CompNode.getValue(1));
10166 
10167   // Unpack the result based on how the target uses it.
10168   unsigned BitNo;   // Bit # of CR6.
10169   bool InvertBit;   // Invert result?
10170   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10171   default:  // Can't happen, don't crash on invalid number though.
10172   case 0:   // Return the value of the EQ bit of CR6.
10173     BitNo = 0; InvertBit = false;
10174     break;
10175   case 1:   // Return the inverted value of the EQ bit of CR6.
10176     BitNo = 0; InvertBit = true;
10177     break;
10178   case 2:   // Return the value of the LT bit of CR6.
10179     BitNo = 2; InvertBit = false;
10180     break;
10181   case 3:   // Return the inverted value of the LT bit of CR6.
10182     BitNo = 2; InvertBit = true;
10183     break;
10184   }
10185 
10186   // Shift the bit into the low position.
10187   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10188                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10189   // Isolate the bit.
10190   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10191                       DAG.getConstant(1, dl, MVT::i32));
10192 
10193   // If we are supposed to, toggle the bit.
10194   if (InvertBit)
10195     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10196                         DAG.getConstant(1, dl, MVT::i32));
10197   return Flags;
10198 }
10199 
10200 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10201                                                SelectionDAG &DAG) const {
10202   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10203   // the beginning of the argument list.
10204   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10205   SDLoc DL(Op);
10206   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10207   case Intrinsic::ppc_cfence: {
10208     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10209     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10210     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10211                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
10212                                                   Op.getOperand(ArgStart + 1)),
10213                                       Op.getOperand(0)),
10214                    0);
10215   }
10216   default:
10217     break;
10218   }
10219   return SDValue();
10220 }
10221 
10222 // Lower scalar BSWAP64 to xxbrd.
10223 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10224   SDLoc dl(Op);
10225   // MTVSRDD
10226   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10227                    Op.getOperand(0));
10228   // XXBRD
10229   Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10230   // MFVSRD
10231   int VectorIndex = 0;
10232   if (Subtarget.isLittleEndian())
10233     VectorIndex = 1;
10234   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10235                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10236   return Op;
10237 }
10238 
10239 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10240 // compared to a value that is atomically loaded (atomic loads zero-extend).
10241 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10242                                                 SelectionDAG &DAG) const {
10243   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10244          "Expecting an atomic compare-and-swap here.");
10245   SDLoc dl(Op);
10246   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10247   EVT MemVT = AtomicNode->getMemoryVT();
10248   if (MemVT.getSizeInBits() >= 32)
10249     return Op;
10250 
10251   SDValue CmpOp = Op.getOperand(2);
10252   // If this is already correctly zero-extended, leave it alone.
10253   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10254   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10255     return Op;
10256 
10257   // Clear the high bits of the compare operand.
10258   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10259   SDValue NewCmpOp =
10260     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10261                 DAG.getConstant(MaskVal, dl, MVT::i32));
10262 
10263   // Replace the existing compare operand with the properly zero-extended one.
10264   SmallVector<SDValue, 4> Ops;
10265   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10266     Ops.push_back(AtomicNode->getOperand(i));
10267   Ops[2] = NewCmpOp;
10268   MachineMemOperand *MMO = AtomicNode->getMemOperand();
10269   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10270   auto NodeTy =
10271     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10272   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10273 }
10274 
10275 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10276                                                  SelectionDAG &DAG) const {
10277   SDLoc dl(Op);
10278   // Create a stack slot that is 16-byte aligned.
10279   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10280   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10281   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10282   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10283 
10284   // Store the input value into Value#0 of the stack slot.
10285   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10286                                MachinePointerInfo());
10287   // Load it out.
10288   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10289 }
10290 
10291 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10292                                                   SelectionDAG &DAG) const {
10293   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10294          "Should only be called for ISD::INSERT_VECTOR_ELT");
10295 
10296   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10297   // We have legal lowering for constant indices but not for variable ones.
10298   if (!C)
10299     return SDValue();
10300 
10301   EVT VT = Op.getValueType();
10302   SDLoc dl(Op);
10303   SDValue V1 = Op.getOperand(0);
10304   SDValue V2 = Op.getOperand(1);
10305   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10306   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10307     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10308     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10309     unsigned InsertAtElement = C->getZExtValue();
10310     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10311     if (Subtarget.isLittleEndian()) {
10312       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10313     }
10314     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10315                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
10316   }
10317   return Op;
10318 }
10319 
10320 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10321   SDLoc dl(Op);
10322   if (Op.getValueType() == MVT::v4i32) {
10323     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10324 
10325     SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10326     // +16 as shift amt.
10327     SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10328     SDValue RHSSwap =   // = vrlw RHS, 16
10329       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10330 
10331     // Shrinkify inputs to v8i16.
10332     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10333     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10334     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10335 
10336     // Low parts multiplied together, generating 32-bit results (we ignore the
10337     // top parts).
10338     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10339                                         LHS, RHS, DAG, dl, MVT::v4i32);
10340 
10341     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10342                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10343     // Shift the high parts up 16 bits.
10344     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10345                               Neg16, DAG, dl);
10346     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10347   } else if (Op.getValueType() == MVT::v16i8) {
10348     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10349     bool isLittleEndian = Subtarget.isLittleEndian();
10350 
10351     // Multiply the even 8-bit parts, producing 16-bit sums.
10352     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10353                                            LHS, RHS, DAG, dl, MVT::v8i16);
10354     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10355 
10356     // Multiply the odd 8-bit parts, producing 16-bit sums.
10357     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10358                                           LHS, RHS, DAG, dl, MVT::v8i16);
10359     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10360 
10361     // Merge the results together.  Because vmuleub and vmuloub are
10362     // instructions with a big-endian bias, we must reverse the
10363     // element numbering and reverse the meaning of "odd" and "even"
10364     // when generating little endian code.
10365     int Ops[16];
10366     for (unsigned i = 0; i != 8; ++i) {
10367       if (isLittleEndian) {
10368         Ops[i*2  ] = 2*i;
10369         Ops[i*2+1] = 2*i+16;
10370       } else {
10371         Ops[i*2  ] = 2*i+1;
10372         Ops[i*2+1] = 2*i+1+16;
10373       }
10374     }
10375     if (isLittleEndian)
10376       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10377     else
10378       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10379   } else {
10380     llvm_unreachable("Unknown mul to lower!");
10381   }
10382 }
10383 
10384 SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
10385 
10386   assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
10387 
10388   EVT VT = Op.getValueType();
10389   assert(VT.isVector() &&
10390          "Only set vector abs as custom, scalar abs shouldn't reach here!");
10391   assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
10392           VT == MVT::v16i8) &&
10393          "Unexpected vector element type!");
10394   assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
10395          "Current subtarget doesn't support smax v2i64!");
10396 
10397   // For vector abs, it can be lowered to:
10398   // abs x
10399   // ==>
10400   // y = -x
10401   // smax(x, y)
10402 
10403   SDLoc dl(Op);
10404   SDValue X = Op.getOperand(0);
10405   SDValue Zero = DAG.getConstant(0, dl, VT);
10406   SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
10407 
10408   // SMAX patch https://reviews.llvm.org/D47332
10409   // hasn't landed yet, so use intrinsic first here.
10410   // TODO: Should use SMAX directly once SMAX patch landed
10411   Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
10412   if (VT == MVT::v2i64)
10413     BifID = Intrinsic::ppc_altivec_vmaxsd;
10414   else if (VT == MVT::v8i16)
10415     BifID = Intrinsic::ppc_altivec_vmaxsh;
10416   else if (VT == MVT::v16i8)
10417     BifID = Intrinsic::ppc_altivec_vmaxsb;
10418 
10419   return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
10420 }
10421 
10422 // Custom lowering for fpext vf32 to v2f64
10423 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10424 
10425   assert(Op.getOpcode() == ISD::FP_EXTEND &&
10426          "Should only be called for ISD::FP_EXTEND");
10427 
10428   // FIXME: handle extends from half precision float vectors on P9.
10429   // We only want to custom lower an extend from v2f32 to v2f64.
10430   if (Op.getValueType() != MVT::v2f64 ||
10431       Op.getOperand(0).getValueType() != MVT::v2f32)
10432     return SDValue();
10433 
10434   SDLoc dl(Op);
10435   SDValue Op0 = Op.getOperand(0);
10436 
10437   switch (Op0.getOpcode()) {
10438   default:
10439     return SDValue();
10440   case ISD::EXTRACT_SUBVECTOR: {
10441     assert(Op0.getNumOperands() == 2 &&
10442            isa<ConstantSDNode>(Op0->getOperand(1)) &&
10443            "Node should have 2 operands with second one being a constant!");
10444 
10445     if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10446       return SDValue();
10447 
10448     // Custom lower is only done for high or low doubleword.
10449     int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10450     if (Idx % 2 != 0)
10451       return SDValue();
10452 
10453     // Since input is v4f32, at this point Idx is either 0 or 2.
10454     // Shift to get the doubleword position we want.
10455     int DWord = Idx >> 1;
10456 
10457     // High and low word positions are different on little endian.
10458     if (Subtarget.isLittleEndian())
10459       DWord ^= 0x1;
10460 
10461     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10462                        Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10463   }
10464   case ISD::FADD:
10465   case ISD::FMUL:
10466   case ISD::FSUB: {
10467     SDValue NewLoad[2];
10468     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10469       // Ensure both input are loads.
10470       SDValue LdOp = Op0.getOperand(i);
10471       if (LdOp.getOpcode() != ISD::LOAD)
10472         return SDValue();
10473       // Generate new load node.
10474       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10475       SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10476       NewLoad[i] = DAG.getMemIntrinsicNode(
10477           PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10478           LD->getMemoryVT(), LD->getMemOperand());
10479     }
10480     SDValue NewOp =
10481         DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10482                     NewLoad[1], Op0.getNode()->getFlags());
10483     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10484                        DAG.getConstant(0, dl, MVT::i32));
10485   }
10486   case ISD::LOAD: {
10487     LoadSDNode *LD = cast<LoadSDNode>(Op0);
10488     SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10489     SDValue NewLd = DAG.getMemIntrinsicNode(
10490         PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10491         LD->getMemoryVT(), LD->getMemOperand());
10492     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10493                        DAG.getConstant(0, dl, MVT::i32));
10494   }
10495   }
10496   llvm_unreachable("ERROR:Should return for all cases within swtich.");
10497 }
10498 
10499 /// LowerOperation - Provide custom lowering hooks for some operations.
10500 ///
10501 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10502   switch (Op.getOpcode()) {
10503   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10504   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
10505   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
10506   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
10507   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
10508   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
10509   case ISD::SETCC:              return LowerSETCC(Op, DAG);
10510   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
10511   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
10512 
10513   // Variable argument lowering.
10514   case ISD::VASTART:            return LowerVASTART(Op, DAG);
10515   case ISD::VAARG:              return LowerVAARG(Op, DAG);
10516   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
10517 
10518   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
10519   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10520   case ISD::GET_DYNAMIC_AREA_OFFSET:
10521     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10522 
10523   // Exception handling lowering.
10524   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
10525   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
10526   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
10527 
10528   case ISD::LOAD:               return LowerLOAD(Op, DAG);
10529   case ISD::STORE:              return LowerSTORE(Op, DAG);
10530   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
10531   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
10532   case ISD::STRICT_FP_TO_UINT:
10533   case ISD::STRICT_FP_TO_SINT:
10534   case ISD::FP_TO_UINT:
10535   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10536   case ISD::UINT_TO_FP:
10537   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
10538   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
10539 
10540   // Lower 64-bit shifts.
10541   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
10542   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
10543   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
10544 
10545   case ISD::FSHL:               return LowerFunnelShift(Op, DAG);
10546   case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
10547 
10548   // Vector-related lowering.
10549   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
10550   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
10551   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10552   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
10553   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
10554   case ISD::MUL:                return LowerMUL(Op, DAG);
10555   case ISD::ABS:                return LowerABS(Op, DAG);
10556   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
10557   case ISD::ROTL:               return LowerROTL(Op, DAG);
10558 
10559   // For counter-based loop handling.
10560   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
10561 
10562   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
10563 
10564   // Frame & Return address.
10565   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
10566   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
10567 
10568   case ISD::INTRINSIC_VOID:
10569     return LowerINTRINSIC_VOID(Op, DAG);
10570   case ISD::BSWAP:
10571     return LowerBSWAP(Op, DAG);
10572   case ISD::ATOMIC_CMP_SWAP:
10573     return LowerATOMIC_CMP_SWAP(Op, DAG);
10574   }
10575 }
10576 
10577 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
10578                                            SmallVectorImpl<SDValue>&Results,
10579                                            SelectionDAG &DAG) const {
10580   SDLoc dl(N);
10581   switch (N->getOpcode()) {
10582   default:
10583     llvm_unreachable("Do not know how to custom type legalize this operation!");
10584   case ISD::READCYCLECOUNTER: {
10585     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
10586     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10587 
10588     Results.push_back(
10589         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
10590     Results.push_back(RTB.getValue(2));
10591     break;
10592   }
10593   case ISD::INTRINSIC_W_CHAIN: {
10594     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10595         Intrinsic::loop_decrement)
10596       break;
10597 
10598     assert(N->getValueType(0) == MVT::i1 &&
10599            "Unexpected result type for CTR decrement intrinsic");
10600     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10601                                  N->getValueType(0));
10602     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10603     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10604                                  N->getOperand(1));
10605 
10606     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10607     Results.push_back(NewInt.getValue(1));
10608     break;
10609   }
10610   case ISD::VAARG: {
10611     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10612       return;
10613 
10614     EVT VT = N->getValueType(0);
10615 
10616     if (VT == MVT::i64) {
10617       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10618 
10619       Results.push_back(NewNode);
10620       Results.push_back(NewNode.getValue(1));
10621     }
10622     return;
10623   }
10624   case ISD::STRICT_FP_TO_SINT:
10625   case ISD::STRICT_FP_TO_UINT:
10626   case ISD::FP_TO_SINT:
10627   case ISD::FP_TO_UINT:
10628     // LowerFP_TO_INT() can only handle f32 and f64.
10629     if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
10630         MVT::ppcf128)
10631       return;
10632     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10633     return;
10634   case ISD::TRUNCATE: {
10635     EVT TrgVT = N->getValueType(0);
10636     EVT OpVT = N->getOperand(0).getValueType();
10637     if (TrgVT.isVector() &&
10638         isOperationCustom(N->getOpcode(), TrgVT) &&
10639         OpVT.getSizeInBits() <= 128 &&
10640         isPowerOf2_32(OpVT.getVectorElementType().getSizeInBits()))
10641       Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
10642     return;
10643   }
10644   case ISD::BITCAST:
10645     // Don't handle bitcast here.
10646     return;
10647   case ISD::FP_EXTEND:
10648     SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
10649     if (Lowered)
10650       Results.push_back(Lowered);
10651     return;
10652   }
10653 }
10654 
10655 //===----------------------------------------------------------------------===//
10656 //  Other Lowering Code
10657 //===----------------------------------------------------------------------===//
10658 
10659 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
10660   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10661   Function *Func = Intrinsic::getDeclaration(M, Id);
10662   return Builder.CreateCall(Func, {});
10663 }
10664 
10665 // The mappings for emitLeading/TrailingFence is taken from
10666 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10667 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
10668                                                  Instruction *Inst,
10669                                                  AtomicOrdering Ord) const {
10670   if (Ord == AtomicOrdering::SequentiallyConsistent)
10671     return callIntrinsic(Builder, Intrinsic::ppc_sync);
10672   if (isReleaseOrStronger(Ord))
10673     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10674   return nullptr;
10675 }
10676 
10677 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
10678                                                   Instruction *Inst,
10679                                                   AtomicOrdering Ord) const {
10680   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
10681     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10682     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10683     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10684     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
10685       return Builder.CreateCall(
10686           Intrinsic::getDeclaration(
10687               Builder.GetInsertBlock()->getParent()->getParent(),
10688               Intrinsic::ppc_cfence, {Inst->getType()}),
10689           {Inst});
10690     // FIXME: Can use isync for rmw operation.
10691     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10692   }
10693   return nullptr;
10694 }
10695 
10696 MachineBasicBlock *
10697 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
10698                                     unsigned AtomicSize,
10699                                     unsigned BinOpcode,
10700                                     unsigned CmpOpcode,
10701                                     unsigned CmpPred) const {
10702   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10703   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10704 
10705   auto LoadMnemonic = PPC::LDARX;
10706   auto StoreMnemonic = PPC::STDCX;
10707   switch (AtomicSize) {
10708   default:
10709     llvm_unreachable("Unexpected size of atomic entity");
10710   case 1:
10711     LoadMnemonic = PPC::LBARX;
10712     StoreMnemonic = PPC::STBCX;
10713     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10714     break;
10715   case 2:
10716     LoadMnemonic = PPC::LHARX;
10717     StoreMnemonic = PPC::STHCX;
10718     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10719     break;
10720   case 4:
10721     LoadMnemonic = PPC::LWARX;
10722     StoreMnemonic = PPC::STWCX;
10723     break;
10724   case 8:
10725     LoadMnemonic = PPC::LDARX;
10726     StoreMnemonic = PPC::STDCX;
10727     break;
10728   }
10729 
10730   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10731   MachineFunction *F = BB->getParent();
10732   MachineFunction::iterator It = ++BB->getIterator();
10733 
10734   Register dest = MI.getOperand(0).getReg();
10735   Register ptrA = MI.getOperand(1).getReg();
10736   Register ptrB = MI.getOperand(2).getReg();
10737   Register incr = MI.getOperand(3).getReg();
10738   DebugLoc dl = MI.getDebugLoc();
10739 
10740   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10741   MachineBasicBlock *loop2MBB =
10742     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10743   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10744   F->insert(It, loopMBB);
10745   if (CmpOpcode)
10746     F->insert(It, loop2MBB);
10747   F->insert(It, exitMBB);
10748   exitMBB->splice(exitMBB->begin(), BB,
10749                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
10750   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10751 
10752   MachineRegisterInfo &RegInfo = F->getRegInfo();
10753   Register TmpReg = (!BinOpcode) ? incr :
10754     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
10755                                            : &PPC::GPRCRegClass);
10756 
10757   //  thisMBB:
10758   //   ...
10759   //   fallthrough --> loopMBB
10760   BB->addSuccessor(loopMBB);
10761 
10762   //  loopMBB:
10763   //   l[wd]arx dest, ptr
10764   //   add r0, dest, incr
10765   //   st[wd]cx. r0, ptr
10766   //   bne- loopMBB
10767   //   fallthrough --> exitMBB
10768 
10769   // For max/min...
10770   //  loopMBB:
10771   //   l[wd]arx dest, ptr
10772   //   cmpl?[wd] incr, dest
10773   //   bgt exitMBB
10774   //  loop2MBB:
10775   //   st[wd]cx. dest, ptr
10776   //   bne- loopMBB
10777   //   fallthrough --> exitMBB
10778 
10779   BB = loopMBB;
10780   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10781     .addReg(ptrA).addReg(ptrB);
10782   if (BinOpcode)
10783     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
10784   if (CmpOpcode) {
10785     // Signed comparisons of byte or halfword values must be sign-extended.
10786     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
10787       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10788       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
10789               ExtReg).addReg(dest);
10790       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10791         .addReg(incr).addReg(ExtReg);
10792     } else
10793       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10794         .addReg(incr).addReg(dest);
10795 
10796     BuildMI(BB, dl, TII->get(PPC::BCC))
10797       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
10798     BB->addSuccessor(loop2MBB);
10799     BB->addSuccessor(exitMBB);
10800     BB = loop2MBB;
10801   }
10802   BuildMI(BB, dl, TII->get(StoreMnemonic))
10803     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
10804   BuildMI(BB, dl, TII->get(PPC::BCC))
10805     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
10806   BB->addSuccessor(loopMBB);
10807   BB->addSuccessor(exitMBB);
10808 
10809   //  exitMBB:
10810   //   ...
10811   BB = exitMBB;
10812   return BB;
10813 }
10814 
10815 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
10816     MachineInstr &MI, MachineBasicBlock *BB,
10817     bool is8bit, // operation
10818     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
10819   // If we support part-word atomic mnemonics, just use them
10820   if (Subtarget.hasPartwordAtomics())
10821     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
10822                             CmpPred);
10823 
10824   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10825   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10826   // In 64 bit mode we have to use 64 bits for addresses, even though the
10827   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
10828   // registers without caring whether they're 32 or 64, but here we're
10829   // doing actual arithmetic on the addresses.
10830   bool is64bit = Subtarget.isPPC64();
10831   bool isLittleEndian = Subtarget.isLittleEndian();
10832   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10833 
10834   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10835   MachineFunction *F = BB->getParent();
10836   MachineFunction::iterator It = ++BB->getIterator();
10837 
10838   Register dest = MI.getOperand(0).getReg();
10839   Register ptrA = MI.getOperand(1).getReg();
10840   Register ptrB = MI.getOperand(2).getReg();
10841   Register incr = MI.getOperand(3).getReg();
10842   DebugLoc dl = MI.getDebugLoc();
10843 
10844   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10845   MachineBasicBlock *loop2MBB =
10846       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10847   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10848   F->insert(It, loopMBB);
10849   if (CmpOpcode)
10850     F->insert(It, loop2MBB);
10851   F->insert(It, exitMBB);
10852   exitMBB->splice(exitMBB->begin(), BB,
10853                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
10854   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10855 
10856   MachineRegisterInfo &RegInfo = F->getRegInfo();
10857   const TargetRegisterClass *RC =
10858       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10859   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10860 
10861   Register PtrReg = RegInfo.createVirtualRegister(RC);
10862   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10863   Register ShiftReg =
10864       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
10865   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
10866   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
10867   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10868   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10869   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10870   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
10871   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10872   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10873   Register Ptr1Reg;
10874   Register TmpReg =
10875       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
10876 
10877   //  thisMBB:
10878   //   ...
10879   //   fallthrough --> loopMBB
10880   BB->addSuccessor(loopMBB);
10881 
10882   // The 4-byte load must be aligned, while a char or short may be
10883   // anywhere in the word.  Hence all this nasty bookkeeping code.
10884   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
10885   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10886   //   xori shift, shift1, 24 [16]
10887   //   rlwinm ptr, ptr1, 0, 0, 29
10888   //   slw incr2, incr, shift
10889   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10890   //   slw mask, mask2, shift
10891   //  loopMBB:
10892   //   lwarx tmpDest, ptr
10893   //   add tmp, tmpDest, incr2
10894   //   andc tmp2, tmpDest, mask
10895   //   and tmp3, tmp, mask
10896   //   or tmp4, tmp3, tmp2
10897   //   stwcx. tmp4, ptr
10898   //   bne- loopMBB
10899   //   fallthrough --> exitMBB
10900   //   srw dest, tmpDest, shift
10901   if (ptrA != ZeroReg) {
10902     Ptr1Reg = RegInfo.createVirtualRegister(RC);
10903     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10904         .addReg(ptrA)
10905         .addReg(ptrB);
10906   } else {
10907     Ptr1Reg = ptrB;
10908   }
10909   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10910   // mode.
10911   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10912       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10913       .addImm(3)
10914       .addImm(27)
10915       .addImm(is8bit ? 28 : 27);
10916   if (!isLittleEndian)
10917     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10918         .addReg(Shift1Reg)
10919         .addImm(is8bit ? 24 : 16);
10920   if (is64bit)
10921     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10922         .addReg(Ptr1Reg)
10923         .addImm(0)
10924         .addImm(61);
10925   else
10926     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10927         .addReg(Ptr1Reg)
10928         .addImm(0)
10929         .addImm(0)
10930         .addImm(29);
10931   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10932   if (is8bit)
10933     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10934   else {
10935     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10936     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10937         .addReg(Mask3Reg)
10938         .addImm(65535);
10939   }
10940   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10941       .addReg(Mask2Reg)
10942       .addReg(ShiftReg);
10943 
10944   BB = loopMBB;
10945   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10946       .addReg(ZeroReg)
10947       .addReg(PtrReg);
10948   if (BinOpcode)
10949     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10950         .addReg(Incr2Reg)
10951         .addReg(TmpDestReg);
10952   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10953       .addReg(TmpDestReg)
10954       .addReg(MaskReg);
10955   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10956   if (CmpOpcode) {
10957     // For unsigned comparisons, we can directly compare the shifted values.
10958     // For signed comparisons we shift and sign extend.
10959     Register SReg = RegInfo.createVirtualRegister(GPRC);
10960     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10961         .addReg(TmpDestReg)
10962         .addReg(MaskReg);
10963     unsigned ValueReg = SReg;
10964     unsigned CmpReg = Incr2Reg;
10965     if (CmpOpcode == PPC::CMPW) {
10966       ValueReg = RegInfo.createVirtualRegister(GPRC);
10967       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10968           .addReg(SReg)
10969           .addReg(ShiftReg);
10970       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
10971       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
10972           .addReg(ValueReg);
10973       ValueReg = ValueSReg;
10974       CmpReg = incr;
10975     }
10976     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10977         .addReg(CmpReg)
10978         .addReg(ValueReg);
10979     BuildMI(BB, dl, TII->get(PPC::BCC))
10980         .addImm(CmpPred)
10981         .addReg(PPC::CR0)
10982         .addMBB(exitMBB);
10983     BB->addSuccessor(loop2MBB);
10984     BB->addSuccessor(exitMBB);
10985     BB = loop2MBB;
10986   }
10987   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10988   BuildMI(BB, dl, TII->get(PPC::STWCX))
10989       .addReg(Tmp4Reg)
10990       .addReg(ZeroReg)
10991       .addReg(PtrReg);
10992   BuildMI(BB, dl, TII->get(PPC::BCC))
10993       .addImm(PPC::PRED_NE)
10994       .addReg(PPC::CR0)
10995       .addMBB(loopMBB);
10996   BB->addSuccessor(loopMBB);
10997   BB->addSuccessor(exitMBB);
10998 
10999   //  exitMBB:
11000   //   ...
11001   BB = exitMBB;
11002   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11003       .addReg(TmpDestReg)
11004       .addReg(ShiftReg);
11005   return BB;
11006 }
11007 
11008 llvm::MachineBasicBlock *
11009 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
11010                                     MachineBasicBlock *MBB) const {
11011   DebugLoc DL = MI.getDebugLoc();
11012   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11013   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11014 
11015   MachineFunction *MF = MBB->getParent();
11016   MachineRegisterInfo &MRI = MF->getRegInfo();
11017 
11018   const BasicBlock *BB = MBB->getBasicBlock();
11019   MachineFunction::iterator I = ++MBB->getIterator();
11020 
11021   Register DstReg = MI.getOperand(0).getReg();
11022   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11023   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11024   Register mainDstReg = MRI.createVirtualRegister(RC);
11025   Register restoreDstReg = MRI.createVirtualRegister(RC);
11026 
11027   MVT PVT = getPointerTy(MF->getDataLayout());
11028   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11029          "Invalid Pointer Size!");
11030   // For v = setjmp(buf), we generate
11031   //
11032   // thisMBB:
11033   //  SjLjSetup mainMBB
11034   //  bl mainMBB
11035   //  v_restore = 1
11036   //  b sinkMBB
11037   //
11038   // mainMBB:
11039   //  buf[LabelOffset] = LR
11040   //  v_main = 0
11041   //
11042   // sinkMBB:
11043   //  v = phi(main, restore)
11044   //
11045 
11046   MachineBasicBlock *thisMBB = MBB;
11047   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
11048   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
11049   MF->insert(I, mainMBB);
11050   MF->insert(I, sinkMBB);
11051 
11052   MachineInstrBuilder MIB;
11053 
11054   // Transfer the remainder of BB and its successor edges to sinkMBB.
11055   sinkMBB->splice(sinkMBB->begin(), MBB,
11056                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11057   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
11058 
11059   // Note that the structure of the jmp_buf used here is not compatible
11060   // with that used by libc, and is not designed to be. Specifically, it
11061   // stores only those 'reserved' registers that LLVM does not otherwise
11062   // understand how to spill. Also, by convention, by the time this
11063   // intrinsic is called, Clang has already stored the frame address in the
11064   // first slot of the buffer and stack address in the third. Following the
11065   // X86 target code, we'll store the jump address in the second slot. We also
11066   // need to save the TOC pointer (R2) to handle jumps between shared
11067   // libraries, and that will be stored in the fourth slot. The thread
11068   // identifier (R13) is not affected.
11069 
11070   // thisMBB:
11071   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11072   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11073   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11074 
11075   // Prepare IP either in reg.
11076   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11077   Register LabelReg = MRI.createVirtualRegister(PtrRC);
11078   Register BufReg = MI.getOperand(1).getReg();
11079 
11080   if (Subtarget.is64BitELFABI()) {
11081     setUsesTOCBasePtr(*MBB->getParent());
11082     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11083               .addReg(PPC::X2)
11084               .addImm(TOCOffset)
11085               .addReg(BufReg)
11086               .cloneMemRefs(MI);
11087   }
11088 
11089   // Naked functions never have a base pointer, and so we use r1. For all
11090   // other functions, this decision must be delayed until during PEI.
11091   unsigned BaseReg;
11092   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11093     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11094   else
11095     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11096 
11097   MIB = BuildMI(*thisMBB, MI, DL,
11098                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11099             .addReg(BaseReg)
11100             .addImm(BPOffset)
11101             .addReg(BufReg)
11102             .cloneMemRefs(MI);
11103 
11104   // Setup
11105   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11106   MIB.addRegMask(TRI->getNoPreservedMask());
11107 
11108   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11109 
11110   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11111           .addMBB(mainMBB);
11112   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11113 
11114   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11115   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11116 
11117   // mainMBB:
11118   //  mainDstReg = 0
11119   MIB =
11120       BuildMI(mainMBB, DL,
11121               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11122 
11123   // Store IP
11124   if (Subtarget.isPPC64()) {
11125     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11126             .addReg(LabelReg)
11127             .addImm(LabelOffset)
11128             .addReg(BufReg);
11129   } else {
11130     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11131             .addReg(LabelReg)
11132             .addImm(LabelOffset)
11133             .addReg(BufReg);
11134   }
11135   MIB.cloneMemRefs(MI);
11136 
11137   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11138   mainMBB->addSuccessor(sinkMBB);
11139 
11140   // sinkMBB:
11141   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11142           TII->get(PPC::PHI), DstReg)
11143     .addReg(mainDstReg).addMBB(mainMBB)
11144     .addReg(restoreDstReg).addMBB(thisMBB);
11145 
11146   MI.eraseFromParent();
11147   return sinkMBB;
11148 }
11149 
11150 MachineBasicBlock *
11151 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11152                                      MachineBasicBlock *MBB) const {
11153   DebugLoc DL = MI.getDebugLoc();
11154   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11155 
11156   MachineFunction *MF = MBB->getParent();
11157   MachineRegisterInfo &MRI = MF->getRegInfo();
11158 
11159   MVT PVT = getPointerTy(MF->getDataLayout());
11160   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11161          "Invalid Pointer Size!");
11162 
11163   const TargetRegisterClass *RC =
11164     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11165   Register Tmp = MRI.createVirtualRegister(RC);
11166   // Since FP is only updated here but NOT referenced, it's treated as GPR.
11167   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11168   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11169   unsigned BP =
11170       (PVT == MVT::i64)
11171           ? PPC::X30
11172           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11173                                                               : PPC::R30);
11174 
11175   MachineInstrBuilder MIB;
11176 
11177   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11178   const int64_t SPOffset    = 2 * PVT.getStoreSize();
11179   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11180   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11181 
11182   Register BufReg = MI.getOperand(0).getReg();
11183 
11184   // Reload FP (the jumped-to function may not have had a
11185   // frame pointer, and if so, then its r31 will be restored
11186   // as necessary).
11187   if (PVT == MVT::i64) {
11188     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11189             .addImm(0)
11190             .addReg(BufReg);
11191   } else {
11192     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11193             .addImm(0)
11194             .addReg(BufReg);
11195   }
11196   MIB.cloneMemRefs(MI);
11197 
11198   // Reload IP
11199   if (PVT == MVT::i64) {
11200     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11201             .addImm(LabelOffset)
11202             .addReg(BufReg);
11203   } else {
11204     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11205             .addImm(LabelOffset)
11206             .addReg(BufReg);
11207   }
11208   MIB.cloneMemRefs(MI);
11209 
11210   // Reload SP
11211   if (PVT == MVT::i64) {
11212     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11213             .addImm(SPOffset)
11214             .addReg(BufReg);
11215   } else {
11216     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11217             .addImm(SPOffset)
11218             .addReg(BufReg);
11219   }
11220   MIB.cloneMemRefs(MI);
11221 
11222   // Reload BP
11223   if (PVT == MVT::i64) {
11224     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11225             .addImm(BPOffset)
11226             .addReg(BufReg);
11227   } else {
11228     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11229             .addImm(BPOffset)
11230             .addReg(BufReg);
11231   }
11232   MIB.cloneMemRefs(MI);
11233 
11234   // Reload TOC
11235   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11236     setUsesTOCBasePtr(*MBB->getParent());
11237     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11238               .addImm(TOCOffset)
11239               .addReg(BufReg)
11240               .cloneMemRefs(MI);
11241   }
11242 
11243   // Jump
11244   BuildMI(*MBB, MI, DL,
11245           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11246   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11247 
11248   MI.eraseFromParent();
11249   return MBB;
11250 }
11251 
11252 bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
11253   // If the function specifically requests inline stack probes, emit them.
11254   if (MF.getFunction().hasFnAttribute("probe-stack"))
11255     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11256            "inline-asm";
11257   return false;
11258 }
11259 
11260 unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
11261   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11262   unsigned StackAlign = TFI->getStackAlignment();
11263   assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
11264          "Unexpected stack alignment");
11265   // The default stack probe size is 4096 if the function has no
11266   // stack-probe-size attribute.
11267   unsigned StackProbeSize = 4096;
11268   const Function &Fn = MF.getFunction();
11269   if (Fn.hasFnAttribute("stack-probe-size"))
11270     Fn.getFnAttribute("stack-probe-size")
11271         .getValueAsString()
11272         .getAsInteger(0, StackProbeSize);
11273   // Round down to the stack alignment.
11274   StackProbeSize &= ~(StackAlign - 1);
11275   return StackProbeSize ? StackProbeSize : StackAlign;
11276 }
11277 
11278 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11279 // into three phases. In the first phase, it uses pseudo instruction
11280 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11281 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11282 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11283 // MaxCallFrameSize so that it can calculate correct data area pointer.
11284 MachineBasicBlock *
11285 PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
11286                                     MachineBasicBlock *MBB) const {
11287   const bool isPPC64 = Subtarget.isPPC64();
11288   MachineFunction *MF = MBB->getParent();
11289   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11290   DebugLoc DL = MI.getDebugLoc();
11291   const unsigned ProbeSize = getStackProbeSize(*MF);
11292   const BasicBlock *ProbedBB = MBB->getBasicBlock();
11293   MachineRegisterInfo &MRI = MF->getRegInfo();
11294   // The CFG of probing stack looks as
11295   //         +-----+
11296   //         | MBB |
11297   //         +--+--+
11298   //            |
11299   //       +----v----+
11300   //  +--->+ TestMBB +---+
11301   //  |    +----+----+   |
11302   //  |         |        |
11303   //  |   +-----v----+   |
11304   //  +---+ BlockMBB |   |
11305   //      +----------+   |
11306   //                     |
11307   //       +---------+   |
11308   //       | TailMBB +<--+
11309   //       +---------+
11310   // In MBB, calculate previous frame pointer and final stack pointer.
11311   // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11312   // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11313   // TailMBB is spliced via \p MI.
11314   MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11315   MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11316   MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11317 
11318   MachineFunction::iterator MBBIter = ++MBB->getIterator();
11319   MF->insert(MBBIter, TestMBB);
11320   MF->insert(MBBIter, BlockMBB);
11321   MF->insert(MBBIter, TailMBB);
11322 
11323   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11324   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11325 
11326   Register DstReg = MI.getOperand(0).getReg();
11327   Register NegSizeReg = MI.getOperand(1).getReg();
11328   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11329   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11330   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11331   Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11332 
11333   // Since value of NegSizeReg might be realigned in prologepilog, insert a
11334   // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11335   // NegSize.
11336   unsigned ProbeOpc;
11337   if (!MRI.hasOneNonDBGUse(NegSizeReg))
11338     ProbeOpc =
11339         isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11340   else
11341     // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11342     // and NegSizeReg will be allocated in the same phyreg to avoid
11343     // redundant copy when NegSizeReg has only one use which is current MI and
11344     // will be replaced by PREPARE_PROBED_ALLOCA then.
11345     ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11346                        : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11347   BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11348       .addDef(ActualNegSizeReg)
11349       .addReg(NegSizeReg)
11350       .add(MI.getOperand(2))
11351       .add(MI.getOperand(3));
11352 
11353   // Calculate final stack pointer, which equals to SP + ActualNegSize.
11354   BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11355           FinalStackPtr)
11356       .addReg(SPReg)
11357       .addReg(ActualNegSizeReg);
11358 
11359   // Materialize a scratch register for update.
11360   int64_t NegProbeSize = -(int64_t)ProbeSize;
11361   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11362   Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11363   if (!isInt<16>(NegProbeSize)) {
11364     Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11365     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11366         .addImm(NegProbeSize >> 16);
11367     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11368             ScratchReg)
11369         .addReg(TempReg)
11370         .addImm(NegProbeSize & 0xFFFF);
11371   } else
11372     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11373         .addImm(NegProbeSize);
11374 
11375   {
11376     // Probing leading residual part.
11377     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11378     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11379         .addReg(ActualNegSizeReg)
11380         .addReg(ScratchReg);
11381     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11382     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11383         .addReg(Div)
11384         .addReg(ScratchReg);
11385     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11386     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11387         .addReg(Mul)
11388         .addReg(ActualNegSizeReg);
11389     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11390         .addReg(FramePointer)
11391         .addReg(SPReg)
11392         .addReg(NegMod);
11393   }
11394 
11395   {
11396     // Remaining part should be multiple of ProbeSize.
11397     Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11398     BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11399         .addReg(SPReg)
11400         .addReg(FinalStackPtr);
11401     BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11402         .addImm(PPC::PRED_EQ)
11403         .addReg(CmpResult)
11404         .addMBB(TailMBB);
11405     TestMBB->addSuccessor(BlockMBB);
11406     TestMBB->addSuccessor(TailMBB);
11407   }
11408 
11409   {
11410     // Touch the block.
11411     // |P...|P...|P...
11412     BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11413         .addReg(FramePointer)
11414         .addReg(SPReg)
11415         .addReg(ScratchReg);
11416     BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11417     BlockMBB->addSuccessor(TestMBB);
11418   }
11419 
11420   // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11421   // DYNAREAOFFSET pseudo instruction to get the future result.
11422   Register MaxCallFrameSizeReg =
11423       MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11424   BuildMI(TailMBB, DL,
11425           TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11426           MaxCallFrameSizeReg)
11427       .add(MI.getOperand(2))
11428       .add(MI.getOperand(3));
11429   BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11430       .addReg(SPReg)
11431       .addReg(MaxCallFrameSizeReg);
11432 
11433   // Splice instructions after MI to TailMBB.
11434   TailMBB->splice(TailMBB->end(), MBB,
11435                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11436   TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
11437   MBB->addSuccessor(TestMBB);
11438 
11439   // Delete the pseudo instruction.
11440   MI.eraseFromParent();
11441 
11442   ++NumDynamicAllocaProbed;
11443   return TailMBB;
11444 }
11445 
11446 MachineBasicBlock *
11447 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11448                                                MachineBasicBlock *BB) const {
11449   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11450       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11451     if (Subtarget.is64BitELFABI() &&
11452         MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11453         !Subtarget.isUsingPCRelativeCalls()) {
11454       // Call lowering should have added an r2 operand to indicate a dependence
11455       // on the TOC base pointer value. It can't however, because there is no
11456       // way to mark the dependence as implicit there, and so the stackmap code
11457       // will confuse it with a regular operand. Instead, add the dependence
11458       // here.
11459       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11460     }
11461 
11462     return emitPatchPoint(MI, BB);
11463   }
11464 
11465   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11466       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11467     return emitEHSjLjSetJmp(MI, BB);
11468   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11469              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11470     return emitEHSjLjLongJmp(MI, BB);
11471   }
11472 
11473   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11474 
11475   // To "insert" these instructions we actually have to insert their
11476   // control-flow patterns.
11477   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11478   MachineFunction::iterator It = ++BB->getIterator();
11479 
11480   MachineFunction *F = BB->getParent();
11481 
11482   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11483       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11484       MI.getOpcode() == PPC::SELECT_I8) {
11485     SmallVector<MachineOperand, 2> Cond;
11486     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11487         MI.getOpcode() == PPC::SELECT_CC_I8)
11488       Cond.push_back(MI.getOperand(4));
11489     else
11490       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
11491     Cond.push_back(MI.getOperand(1));
11492 
11493     DebugLoc dl = MI.getDebugLoc();
11494     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11495                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11496   } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11497              MI.getOpcode() == PPC::SELECT_CC_F8 ||
11498              MI.getOpcode() == PPC::SELECT_CC_F16 ||
11499              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11500              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11501              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11502              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11503              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11504              MI.getOpcode() == PPC::SELECT_CC_SPE ||
11505              MI.getOpcode() == PPC::SELECT_F4 ||
11506              MI.getOpcode() == PPC::SELECT_F8 ||
11507              MI.getOpcode() == PPC::SELECT_F16 ||
11508              MI.getOpcode() == PPC::SELECT_SPE ||
11509              MI.getOpcode() == PPC::SELECT_SPE4 ||
11510              MI.getOpcode() == PPC::SELECT_VRRC ||
11511              MI.getOpcode() == PPC::SELECT_VSFRC ||
11512              MI.getOpcode() == PPC::SELECT_VSSRC ||
11513              MI.getOpcode() == PPC::SELECT_VSRC) {
11514     // The incoming instruction knows the destination vreg to set, the
11515     // condition code register to branch on, the true/false values to
11516     // select between, and a branch opcode to use.
11517 
11518     //  thisMBB:
11519     //  ...
11520     //   TrueVal = ...
11521     //   cmpTY ccX, r1, r2
11522     //   bCC copy1MBB
11523     //   fallthrough --> copy0MBB
11524     MachineBasicBlock *thisMBB = BB;
11525     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11526     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11527     DebugLoc dl = MI.getDebugLoc();
11528     F->insert(It, copy0MBB);
11529     F->insert(It, sinkMBB);
11530 
11531     // Transfer the remainder of BB and its successor edges to sinkMBB.
11532     sinkMBB->splice(sinkMBB->begin(), BB,
11533                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11534     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11535 
11536     // Next, add the true and fallthrough blocks as its successors.
11537     BB->addSuccessor(copy0MBB);
11538     BB->addSuccessor(sinkMBB);
11539 
11540     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11541         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11542         MI.getOpcode() == PPC::SELECT_F16 ||
11543         MI.getOpcode() == PPC::SELECT_SPE4 ||
11544         MI.getOpcode() == PPC::SELECT_SPE ||
11545         MI.getOpcode() == PPC::SELECT_VRRC ||
11546         MI.getOpcode() == PPC::SELECT_VSFRC ||
11547         MI.getOpcode() == PPC::SELECT_VSSRC ||
11548         MI.getOpcode() == PPC::SELECT_VSRC) {
11549       BuildMI(BB, dl, TII->get(PPC::BC))
11550           .addReg(MI.getOperand(1).getReg())
11551           .addMBB(sinkMBB);
11552     } else {
11553       unsigned SelectPred = MI.getOperand(4).getImm();
11554       BuildMI(BB, dl, TII->get(PPC::BCC))
11555           .addImm(SelectPred)
11556           .addReg(MI.getOperand(1).getReg())
11557           .addMBB(sinkMBB);
11558     }
11559 
11560     //  copy0MBB:
11561     //   %FalseValue = ...
11562     //   # fallthrough to sinkMBB
11563     BB = copy0MBB;
11564 
11565     // Update machine-CFG edges
11566     BB->addSuccessor(sinkMBB);
11567 
11568     //  sinkMBB:
11569     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11570     //  ...
11571     BB = sinkMBB;
11572     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11573         .addReg(MI.getOperand(3).getReg())
11574         .addMBB(copy0MBB)
11575         .addReg(MI.getOperand(2).getReg())
11576         .addMBB(thisMBB);
11577   } else if (MI.getOpcode() == PPC::ReadTB) {
11578     // To read the 64-bit time-base register on a 32-bit target, we read the
11579     // two halves. Should the counter have wrapped while it was being read, we
11580     // need to try again.
11581     // ...
11582     // readLoop:
11583     // mfspr Rx,TBU # load from TBU
11584     // mfspr Ry,TB  # load from TB
11585     // mfspr Rz,TBU # load from TBU
11586     // cmpw crX,Rx,Rz # check if 'old'='new'
11587     // bne readLoop   # branch if they're not equal
11588     // ...
11589 
11590     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
11591     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11592     DebugLoc dl = MI.getDebugLoc();
11593     F->insert(It, readMBB);
11594     F->insert(It, sinkMBB);
11595 
11596     // Transfer the remainder of BB and its successor edges to sinkMBB.
11597     sinkMBB->splice(sinkMBB->begin(), BB,
11598                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11599     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11600 
11601     BB->addSuccessor(readMBB);
11602     BB = readMBB;
11603 
11604     MachineRegisterInfo &RegInfo = F->getRegInfo();
11605     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11606     Register LoReg = MI.getOperand(0).getReg();
11607     Register HiReg = MI.getOperand(1).getReg();
11608 
11609     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
11610     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
11611     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
11612 
11613     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11614 
11615     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
11616         .addReg(HiReg)
11617         .addReg(ReadAgainReg);
11618     BuildMI(BB, dl, TII->get(PPC::BCC))
11619         .addImm(PPC::PRED_NE)
11620         .addReg(CmpReg)
11621         .addMBB(readMBB);
11622 
11623     BB->addSuccessor(readMBB);
11624     BB->addSuccessor(sinkMBB);
11625   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
11626     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
11627   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
11628     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
11629   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
11630     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
11631   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
11632     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
11633 
11634   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
11635     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
11636   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
11637     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
11638   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
11639     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
11640   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
11641     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
11642 
11643   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
11644     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
11645   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
11646     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
11647   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
11648     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
11649   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
11650     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
11651 
11652   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
11653     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
11654   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
11655     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
11656   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
11657     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
11658   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
11659     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
11660 
11661   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
11662     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
11663   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
11664     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
11665   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
11666     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
11667   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
11668     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
11669 
11670   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
11671     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
11672   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
11673     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
11674   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
11675     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
11676   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
11677     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
11678 
11679   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
11680     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
11681   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
11682     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
11683   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
11684     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
11685   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
11686     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
11687 
11688   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
11689     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
11690   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
11691     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
11692   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
11693     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
11694   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
11695     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
11696 
11697   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
11698     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
11699   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
11700     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
11701   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
11702     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
11703   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
11704     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
11705 
11706   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
11707     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
11708   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
11709     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
11710   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
11711     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
11712   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
11713     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
11714 
11715   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
11716     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
11717   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
11718     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
11719   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
11720     BB = EmitAtomicBinary(MI, BB, 4, 0);
11721   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
11722     BB = EmitAtomicBinary(MI, BB, 8, 0);
11723   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
11724            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
11725            (Subtarget.hasPartwordAtomics() &&
11726             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
11727            (Subtarget.hasPartwordAtomics() &&
11728             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
11729     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
11730 
11731     auto LoadMnemonic = PPC::LDARX;
11732     auto StoreMnemonic = PPC::STDCX;
11733     switch (MI.getOpcode()) {
11734     default:
11735       llvm_unreachable("Compare and swap of unknown size");
11736     case PPC::ATOMIC_CMP_SWAP_I8:
11737       LoadMnemonic = PPC::LBARX;
11738       StoreMnemonic = PPC::STBCX;
11739       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11740       break;
11741     case PPC::ATOMIC_CMP_SWAP_I16:
11742       LoadMnemonic = PPC::LHARX;
11743       StoreMnemonic = PPC::STHCX;
11744       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11745       break;
11746     case PPC::ATOMIC_CMP_SWAP_I32:
11747       LoadMnemonic = PPC::LWARX;
11748       StoreMnemonic = PPC::STWCX;
11749       break;
11750     case PPC::ATOMIC_CMP_SWAP_I64:
11751       LoadMnemonic = PPC::LDARX;
11752       StoreMnemonic = PPC::STDCX;
11753       break;
11754     }
11755     Register dest = MI.getOperand(0).getReg();
11756     Register ptrA = MI.getOperand(1).getReg();
11757     Register ptrB = MI.getOperand(2).getReg();
11758     Register oldval = MI.getOperand(3).getReg();
11759     Register newval = MI.getOperand(4).getReg();
11760     DebugLoc dl = MI.getDebugLoc();
11761 
11762     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11763     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11764     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11765     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11766     F->insert(It, loop1MBB);
11767     F->insert(It, loop2MBB);
11768     F->insert(It, midMBB);
11769     F->insert(It, exitMBB);
11770     exitMBB->splice(exitMBB->begin(), BB,
11771                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11772     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11773 
11774     //  thisMBB:
11775     //   ...
11776     //   fallthrough --> loopMBB
11777     BB->addSuccessor(loop1MBB);
11778 
11779     // loop1MBB:
11780     //   l[bhwd]arx dest, ptr
11781     //   cmp[wd] dest, oldval
11782     //   bne- midMBB
11783     // loop2MBB:
11784     //   st[bhwd]cx. newval, ptr
11785     //   bne- loopMBB
11786     //   b exitBB
11787     // midMBB:
11788     //   st[bhwd]cx. dest, ptr
11789     // exitBB:
11790     BB = loop1MBB;
11791     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11792     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
11793         .addReg(oldval)
11794         .addReg(dest);
11795     BuildMI(BB, dl, TII->get(PPC::BCC))
11796         .addImm(PPC::PRED_NE)
11797         .addReg(PPC::CR0)
11798         .addMBB(midMBB);
11799     BB->addSuccessor(loop2MBB);
11800     BB->addSuccessor(midMBB);
11801 
11802     BB = loop2MBB;
11803     BuildMI(BB, dl, TII->get(StoreMnemonic))
11804         .addReg(newval)
11805         .addReg(ptrA)
11806         .addReg(ptrB);
11807     BuildMI(BB, dl, TII->get(PPC::BCC))
11808         .addImm(PPC::PRED_NE)
11809         .addReg(PPC::CR0)
11810         .addMBB(loop1MBB);
11811     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11812     BB->addSuccessor(loop1MBB);
11813     BB->addSuccessor(exitMBB);
11814 
11815     BB = midMBB;
11816     BuildMI(BB, dl, TII->get(StoreMnemonic))
11817         .addReg(dest)
11818         .addReg(ptrA)
11819         .addReg(ptrB);
11820     BB->addSuccessor(exitMBB);
11821 
11822     //  exitMBB:
11823     //   ...
11824     BB = exitMBB;
11825   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11826              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
11827     // We must use 64-bit registers for addresses when targeting 64-bit,
11828     // since we're actually doing arithmetic on them.  Other registers
11829     // can be 32-bit.
11830     bool is64bit = Subtarget.isPPC64();
11831     bool isLittleEndian = Subtarget.isLittleEndian();
11832     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11833 
11834     Register dest = MI.getOperand(0).getReg();
11835     Register ptrA = MI.getOperand(1).getReg();
11836     Register ptrB = MI.getOperand(2).getReg();
11837     Register oldval = MI.getOperand(3).getReg();
11838     Register newval = MI.getOperand(4).getReg();
11839     DebugLoc dl = MI.getDebugLoc();
11840 
11841     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11842     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11843     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11844     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11845     F->insert(It, loop1MBB);
11846     F->insert(It, loop2MBB);
11847     F->insert(It, midMBB);
11848     F->insert(It, exitMBB);
11849     exitMBB->splice(exitMBB->begin(), BB,
11850                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11851     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11852 
11853     MachineRegisterInfo &RegInfo = F->getRegInfo();
11854     const TargetRegisterClass *RC =
11855         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11856     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11857 
11858     Register PtrReg = RegInfo.createVirtualRegister(RC);
11859     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11860     Register ShiftReg =
11861         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11862     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
11863     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
11864     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
11865     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
11866     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11867     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11868     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11869     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11870     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11871     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11872     Register Ptr1Reg;
11873     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
11874     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11875     //  thisMBB:
11876     //   ...
11877     //   fallthrough --> loopMBB
11878     BB->addSuccessor(loop1MBB);
11879 
11880     // The 4-byte load must be aligned, while a char or short may be
11881     // anywhere in the word.  Hence all this nasty bookkeeping code.
11882     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
11883     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11884     //   xori shift, shift1, 24 [16]
11885     //   rlwinm ptr, ptr1, 0, 0, 29
11886     //   slw newval2, newval, shift
11887     //   slw oldval2, oldval,shift
11888     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11889     //   slw mask, mask2, shift
11890     //   and newval3, newval2, mask
11891     //   and oldval3, oldval2, mask
11892     // loop1MBB:
11893     //   lwarx tmpDest, ptr
11894     //   and tmp, tmpDest, mask
11895     //   cmpw tmp, oldval3
11896     //   bne- midMBB
11897     // loop2MBB:
11898     //   andc tmp2, tmpDest, mask
11899     //   or tmp4, tmp2, newval3
11900     //   stwcx. tmp4, ptr
11901     //   bne- loop1MBB
11902     //   b exitBB
11903     // midMBB:
11904     //   stwcx. tmpDest, ptr
11905     // exitBB:
11906     //   srw dest, tmpDest, shift
11907     if (ptrA != ZeroReg) {
11908       Ptr1Reg = RegInfo.createVirtualRegister(RC);
11909       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11910           .addReg(ptrA)
11911           .addReg(ptrB);
11912     } else {
11913       Ptr1Reg = ptrB;
11914     }
11915 
11916     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11917     // mode.
11918     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11919         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11920         .addImm(3)
11921         .addImm(27)
11922         .addImm(is8bit ? 28 : 27);
11923     if (!isLittleEndian)
11924       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11925           .addReg(Shift1Reg)
11926           .addImm(is8bit ? 24 : 16);
11927     if (is64bit)
11928       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11929           .addReg(Ptr1Reg)
11930           .addImm(0)
11931           .addImm(61);
11932     else
11933       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11934           .addReg(Ptr1Reg)
11935           .addImm(0)
11936           .addImm(0)
11937           .addImm(29);
11938     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
11939         .addReg(newval)
11940         .addReg(ShiftReg);
11941     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
11942         .addReg(oldval)
11943         .addReg(ShiftReg);
11944     if (is8bit)
11945       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11946     else {
11947       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11948       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11949           .addReg(Mask3Reg)
11950           .addImm(65535);
11951     }
11952     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11953         .addReg(Mask2Reg)
11954         .addReg(ShiftReg);
11955     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
11956         .addReg(NewVal2Reg)
11957         .addReg(MaskReg);
11958     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
11959         .addReg(OldVal2Reg)
11960         .addReg(MaskReg);
11961 
11962     BB = loop1MBB;
11963     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11964         .addReg(ZeroReg)
11965         .addReg(PtrReg);
11966     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
11967         .addReg(TmpDestReg)
11968         .addReg(MaskReg);
11969     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
11970         .addReg(TmpReg)
11971         .addReg(OldVal3Reg);
11972     BuildMI(BB, dl, TII->get(PPC::BCC))
11973         .addImm(PPC::PRED_NE)
11974         .addReg(PPC::CR0)
11975         .addMBB(midMBB);
11976     BB->addSuccessor(loop2MBB);
11977     BB->addSuccessor(midMBB);
11978 
11979     BB = loop2MBB;
11980     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11981         .addReg(TmpDestReg)
11982         .addReg(MaskReg);
11983     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
11984         .addReg(Tmp2Reg)
11985         .addReg(NewVal3Reg);
11986     BuildMI(BB, dl, TII->get(PPC::STWCX))
11987         .addReg(Tmp4Reg)
11988         .addReg(ZeroReg)
11989         .addReg(PtrReg);
11990     BuildMI(BB, dl, TII->get(PPC::BCC))
11991         .addImm(PPC::PRED_NE)
11992         .addReg(PPC::CR0)
11993         .addMBB(loop1MBB);
11994     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11995     BB->addSuccessor(loop1MBB);
11996     BB->addSuccessor(exitMBB);
11997 
11998     BB = midMBB;
11999     BuildMI(BB, dl, TII->get(PPC::STWCX))
12000         .addReg(TmpDestReg)
12001         .addReg(ZeroReg)
12002         .addReg(PtrReg);
12003     BB->addSuccessor(exitMBB);
12004 
12005     //  exitMBB:
12006     //   ...
12007     BB = exitMBB;
12008     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12009         .addReg(TmpReg)
12010         .addReg(ShiftReg);
12011   } else if (MI.getOpcode() == PPC::FADDrtz) {
12012     // This pseudo performs an FADD with rounding mode temporarily forced
12013     // to round-to-zero.  We emit this via custom inserter since the FPSCR
12014     // is not modeled at the SelectionDAG level.
12015     Register Dest = MI.getOperand(0).getReg();
12016     Register Src1 = MI.getOperand(1).getReg();
12017     Register Src2 = MI.getOperand(2).getReg();
12018     DebugLoc dl = MI.getDebugLoc();
12019 
12020     MachineRegisterInfo &RegInfo = F->getRegInfo();
12021     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12022 
12023     // Save FPSCR value.
12024     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12025 
12026     // Set rounding mode to round-to-zero.
12027     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12028         .addImm(31)
12029         .addReg(PPC::RM, RegState::ImplicitDefine);
12030 
12031     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12032         .addImm(30)
12033         .addReg(PPC::RM, RegState::ImplicitDefine);
12034 
12035     // Perform addition.
12036     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
12037 
12038     // Restore FPSCR value.
12039     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12040   } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12041              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12042              MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12043              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12044     unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12045                        MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12046                           ? PPC::ANDI8_rec
12047                           : PPC::ANDI_rec;
12048     bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12049                  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12050 
12051     MachineRegisterInfo &RegInfo = F->getRegInfo();
12052     Register Dest = RegInfo.createVirtualRegister(
12053         Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12054 
12055     DebugLoc Dl = MI.getDebugLoc();
12056     BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12057         .addReg(MI.getOperand(1).getReg())
12058         .addImm(1);
12059     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12060             MI.getOperand(0).getReg())
12061         .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12062   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12063     DebugLoc Dl = MI.getDebugLoc();
12064     MachineRegisterInfo &RegInfo = F->getRegInfo();
12065     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12066     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12067     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12068             MI.getOperand(0).getReg())
12069         .addReg(CRReg);
12070   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12071     DebugLoc Dl = MI.getDebugLoc();
12072     unsigned Imm = MI.getOperand(1).getImm();
12073     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12074     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12075             MI.getOperand(0).getReg())
12076         .addReg(PPC::CR0EQ);
12077   } else if (MI.getOpcode() == PPC::SETRNDi) {
12078     DebugLoc dl = MI.getDebugLoc();
12079     Register OldFPSCRReg = MI.getOperand(0).getReg();
12080 
12081     // Save FPSCR value.
12082     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12083 
12084     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12085     // the following settings:
12086     //   00 Round to nearest
12087     //   01 Round to 0
12088     //   10 Round to +inf
12089     //   11 Round to -inf
12090 
12091     // When the operand is immediate, using the two least significant bits of
12092     // the immediate to set the bits 62:63 of FPSCR.
12093     unsigned Mode = MI.getOperand(1).getImm();
12094     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12095         .addImm(31)
12096         .addReg(PPC::RM, RegState::ImplicitDefine);
12097 
12098     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12099         .addImm(30)
12100         .addReg(PPC::RM, RegState::ImplicitDefine);
12101   } else if (MI.getOpcode() == PPC::SETRND) {
12102     DebugLoc dl = MI.getDebugLoc();
12103 
12104     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12105     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12106     // If the target doesn't have DirectMove, we should use stack to do the
12107     // conversion, because the target doesn't have the instructions like mtvsrd
12108     // or mfvsrd to do this conversion directly.
12109     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12110       if (Subtarget.hasDirectMove()) {
12111         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12112           .addReg(SrcReg);
12113       } else {
12114         // Use stack to do the register copy.
12115         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12116         MachineRegisterInfo &RegInfo = F->getRegInfo();
12117         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12118         if (RC == &PPC::F8RCRegClass) {
12119           // Copy register from F8RCRegClass to G8RCRegclass.
12120           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12121                  "Unsupported RegClass.");
12122 
12123           StoreOp = PPC::STFD;
12124           LoadOp = PPC::LD;
12125         } else {
12126           // Copy register from G8RCRegClass to F8RCRegclass.
12127           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12128                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12129                  "Unsupported RegClass.");
12130         }
12131 
12132         MachineFrameInfo &MFI = F->getFrameInfo();
12133         int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12134 
12135         MachineMemOperand *MMOStore = F->getMachineMemOperand(
12136             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12137             MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12138             MFI.getObjectAlign(FrameIdx));
12139 
12140         // Store the SrcReg into the stack.
12141         BuildMI(*BB, MI, dl, TII->get(StoreOp))
12142           .addReg(SrcReg)
12143           .addImm(0)
12144           .addFrameIndex(FrameIdx)
12145           .addMemOperand(MMOStore);
12146 
12147         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12148             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12149             MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12150             MFI.getObjectAlign(FrameIdx));
12151 
12152         // Load from the stack where SrcReg is stored, and save to DestReg,
12153         // so we have done the RegClass conversion from RegClass::SrcReg to
12154         // RegClass::DestReg.
12155         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12156           .addImm(0)
12157           .addFrameIndex(FrameIdx)
12158           .addMemOperand(MMOLoad);
12159       }
12160     };
12161 
12162     Register OldFPSCRReg = MI.getOperand(0).getReg();
12163 
12164     // Save FPSCR value.
12165     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12166 
12167     // When the operand is gprc register, use two least significant bits of the
12168     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12169     //
12170     // copy OldFPSCRTmpReg, OldFPSCRReg
12171     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12172     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12173     // copy NewFPSCRReg, NewFPSCRTmpReg
12174     // mtfsf 255, NewFPSCRReg
12175     MachineOperand SrcOp = MI.getOperand(1);
12176     MachineRegisterInfo &RegInfo = F->getRegInfo();
12177     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12178 
12179     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12180 
12181     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12182     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12183 
12184     // The first operand of INSERT_SUBREG should be a register which has
12185     // subregisters, we only care about its RegClass, so we should use an
12186     // IMPLICIT_DEF register.
12187     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12188     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12189       .addReg(ImDefReg)
12190       .add(SrcOp)
12191       .addImm(1);
12192 
12193     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12194     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12195       .addReg(OldFPSCRTmpReg)
12196       .addReg(ExtSrcReg)
12197       .addImm(0)
12198       .addImm(62);
12199 
12200     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12201     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12202 
12203     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12204     // bits of FPSCR.
12205     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12206       .addImm(255)
12207       .addReg(NewFPSCRReg)
12208       .addImm(0)
12209       .addImm(0);
12210   } else if (MI.getOpcode() == PPC::SETFLM) {
12211     DebugLoc Dl = MI.getDebugLoc();
12212 
12213     // Result of setflm is previous FPSCR content, so we need to save it first.
12214     Register OldFPSCRReg = MI.getOperand(0).getReg();
12215     BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12216 
12217     // Put bits in 32:63 to FPSCR.
12218     Register NewFPSCRReg = MI.getOperand(1).getReg();
12219     BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12220         .addImm(255)
12221         .addReg(NewFPSCRReg)
12222         .addImm(0)
12223         .addImm(0);
12224   } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12225              MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12226     return emitProbedAlloca(MI, BB);
12227   } else {
12228     llvm_unreachable("Unexpected instr type to insert");
12229   }
12230 
12231   MI.eraseFromParent(); // The pseudo instruction is gone now.
12232   return BB;
12233 }
12234 
12235 //===----------------------------------------------------------------------===//
12236 // Target Optimization Hooks
12237 //===----------------------------------------------------------------------===//
12238 
12239 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12240   // For the estimates, convergence is quadratic, so we essentially double the
12241   // number of digits correct after every iteration. For both FRE and FRSQRTE,
12242   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12243   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12244   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12245   if (VT.getScalarType() == MVT::f64)
12246     RefinementSteps++;
12247   return RefinementSteps;
12248 }
12249 
12250 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12251                                            int Enabled, int &RefinementSteps,
12252                                            bool &UseOneConstNR,
12253                                            bool Reciprocal) const {
12254   EVT VT = Operand.getValueType();
12255   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12256       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12257       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12258       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12259     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12260       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12261 
12262     // The Newton-Raphson computation with a single constant does not provide
12263     // enough accuracy on some CPUs.
12264     UseOneConstNR = !Subtarget.needsTwoConstNR();
12265     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12266   }
12267   return SDValue();
12268 }
12269 
12270 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12271                                             int Enabled,
12272                                             int &RefinementSteps) const {
12273   EVT VT = Operand.getValueType();
12274   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12275       (VT == MVT::f64 && Subtarget.hasFRE()) ||
12276       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12277       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12278     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12279       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12280     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12281   }
12282   return SDValue();
12283 }
12284 
12285 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12286   // Note: This functionality is used only when unsafe-fp-math is enabled, and
12287   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12288   // enabled for division), this functionality is redundant with the default
12289   // combiner logic (once the division -> reciprocal/multiply transformation
12290   // has taken place). As a result, this matters more for older cores than for
12291   // newer ones.
12292 
12293   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12294   // reciprocal if there are two or more FDIVs (for embedded cores with only
12295   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12296   switch (Subtarget.getCPUDirective()) {
12297   default:
12298     return 3;
12299   case PPC::DIR_440:
12300   case PPC::DIR_A2:
12301   case PPC::DIR_E500:
12302   case PPC::DIR_E500mc:
12303   case PPC::DIR_E5500:
12304     return 2;
12305   }
12306 }
12307 
12308 // isConsecutiveLSLoc needs to work even if all adds have not yet been
12309 // collapsed, and so we need to look through chains of them.
12310 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
12311                                      int64_t& Offset, SelectionDAG &DAG) {
12312   if (DAG.isBaseWithConstantOffset(Loc)) {
12313     Base = Loc.getOperand(0);
12314     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12315 
12316     // The base might itself be a base plus an offset, and if so, accumulate
12317     // that as well.
12318     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12319   }
12320 }
12321 
12322 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12323                             unsigned Bytes, int Dist,
12324                             SelectionDAG &DAG) {
12325   if (VT.getSizeInBits() / 8 != Bytes)
12326     return false;
12327 
12328   SDValue BaseLoc = Base->getBasePtr();
12329   if (Loc.getOpcode() == ISD::FrameIndex) {
12330     if (BaseLoc.getOpcode() != ISD::FrameIndex)
12331       return false;
12332     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12333     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
12334     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12335     int FS  = MFI.getObjectSize(FI);
12336     int BFS = MFI.getObjectSize(BFI);
12337     if (FS != BFS || FS != (int)Bytes) return false;
12338     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12339   }
12340 
12341   SDValue Base1 = Loc, Base2 = BaseLoc;
12342   int64_t Offset1 = 0, Offset2 = 0;
12343   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12344   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12345   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12346     return true;
12347 
12348   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12349   const GlobalValue *GV1 = nullptr;
12350   const GlobalValue *GV2 = nullptr;
12351   Offset1 = 0;
12352   Offset2 = 0;
12353   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12354   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12355   if (isGA1 && isGA2 && GV1 == GV2)
12356     return Offset1 == (Offset2 + Dist*Bytes);
12357   return false;
12358 }
12359 
12360 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12361 // not enforce equality of the chain operands.
12362 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12363                             unsigned Bytes, int Dist,
12364                             SelectionDAG &DAG) {
12365   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12366     EVT VT = LS->getMemoryVT();
12367     SDValue Loc = LS->getBasePtr();
12368     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12369   }
12370 
12371   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12372     EVT VT;
12373     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12374     default: return false;
12375     case Intrinsic::ppc_altivec_lvx:
12376     case Intrinsic::ppc_altivec_lvxl:
12377     case Intrinsic::ppc_vsx_lxvw4x:
12378     case Intrinsic::ppc_vsx_lxvw4x_be:
12379       VT = MVT::v4i32;
12380       break;
12381     case Intrinsic::ppc_vsx_lxvd2x:
12382     case Intrinsic::ppc_vsx_lxvd2x_be:
12383       VT = MVT::v2f64;
12384       break;
12385     case Intrinsic::ppc_altivec_lvebx:
12386       VT = MVT::i8;
12387       break;
12388     case Intrinsic::ppc_altivec_lvehx:
12389       VT = MVT::i16;
12390       break;
12391     case Intrinsic::ppc_altivec_lvewx:
12392       VT = MVT::i32;
12393       break;
12394     }
12395 
12396     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12397   }
12398 
12399   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12400     EVT VT;
12401     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12402     default: return false;
12403     case Intrinsic::ppc_altivec_stvx:
12404     case Intrinsic::ppc_altivec_stvxl:
12405     case Intrinsic::ppc_vsx_stxvw4x:
12406       VT = MVT::v4i32;
12407       break;
12408     case Intrinsic::ppc_vsx_stxvd2x:
12409       VT = MVT::v2f64;
12410       break;
12411     case Intrinsic::ppc_vsx_stxvw4x_be:
12412       VT = MVT::v4i32;
12413       break;
12414     case Intrinsic::ppc_vsx_stxvd2x_be:
12415       VT = MVT::v2f64;
12416       break;
12417     case Intrinsic::ppc_altivec_stvebx:
12418       VT = MVT::i8;
12419       break;
12420     case Intrinsic::ppc_altivec_stvehx:
12421       VT = MVT::i16;
12422       break;
12423     case Intrinsic::ppc_altivec_stvewx:
12424       VT = MVT::i32;
12425       break;
12426     }
12427 
12428     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12429   }
12430 
12431   return false;
12432 }
12433 
12434 // Return true is there is a nearyby consecutive load to the one provided
12435 // (regardless of alignment). We search up and down the chain, looking though
12436 // token factors and other loads (but nothing else). As a result, a true result
12437 // indicates that it is safe to create a new consecutive load adjacent to the
12438 // load provided.
12439 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
12440   SDValue Chain = LD->getChain();
12441   EVT VT = LD->getMemoryVT();
12442 
12443   SmallSet<SDNode *, 16> LoadRoots;
12444   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12445   SmallSet<SDNode *, 16> Visited;
12446 
12447   // First, search up the chain, branching to follow all token-factor operands.
12448   // If we find a consecutive load, then we're done, otherwise, record all
12449   // nodes just above the top-level loads and token factors.
12450   while (!Queue.empty()) {
12451     SDNode *ChainNext = Queue.pop_back_val();
12452     if (!Visited.insert(ChainNext).second)
12453       continue;
12454 
12455     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12456       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12457         return true;
12458 
12459       if (!Visited.count(ChainLD->getChain().getNode()))
12460         Queue.push_back(ChainLD->getChain().getNode());
12461     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12462       for (const SDUse &O : ChainNext->ops())
12463         if (!Visited.count(O.getNode()))
12464           Queue.push_back(O.getNode());
12465     } else
12466       LoadRoots.insert(ChainNext);
12467   }
12468 
12469   // Second, search down the chain, starting from the top-level nodes recorded
12470   // in the first phase. These top-level nodes are the nodes just above all
12471   // loads and token factors. Starting with their uses, recursively look though
12472   // all loads (just the chain uses) and token factors to find a consecutive
12473   // load.
12474   Visited.clear();
12475   Queue.clear();
12476 
12477   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12478        IE = LoadRoots.end(); I != IE; ++I) {
12479     Queue.push_back(*I);
12480 
12481     while (!Queue.empty()) {
12482       SDNode *LoadRoot = Queue.pop_back_val();
12483       if (!Visited.insert(LoadRoot).second)
12484         continue;
12485 
12486       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12487         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12488           return true;
12489 
12490       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12491            UE = LoadRoot->use_end(); UI != UE; ++UI)
12492         if (((isa<MemSDNode>(*UI) &&
12493             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12494             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12495           Queue.push_back(*UI);
12496     }
12497   }
12498 
12499   return false;
12500 }
12501 
12502 /// This function is called when we have proved that a SETCC node can be replaced
12503 /// by subtraction (and other supporting instructions) so that the result of
12504 /// comparison is kept in a GPR instead of CR. This function is purely for
12505 /// codegen purposes and has some flags to guide the codegen process.
12506 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12507                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12508   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12509 
12510   // Zero extend the operands to the largest legal integer. Originally, they
12511   // must be of a strictly smaller size.
12512   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12513                          DAG.getConstant(Size, DL, MVT::i32));
12514   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12515                          DAG.getConstant(Size, DL, MVT::i32));
12516 
12517   // Swap if needed. Depends on the condition code.
12518   if (Swap)
12519     std::swap(Op0, Op1);
12520 
12521   // Subtract extended integers.
12522   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12523 
12524   // Move the sign bit to the least significant position and zero out the rest.
12525   // Now the least significant bit carries the result of original comparison.
12526   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12527                              DAG.getConstant(Size - 1, DL, MVT::i32));
12528   auto Final = Shifted;
12529 
12530   // Complement the result if needed. Based on the condition code.
12531   if (Complement)
12532     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12533                         DAG.getConstant(1, DL, MVT::i64));
12534 
12535   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
12536 }
12537 
12538 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
12539                                                   DAGCombinerInfo &DCI) const {
12540   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12541 
12542   SelectionDAG &DAG = DCI.DAG;
12543   SDLoc DL(N);
12544 
12545   // Size of integers being compared has a critical role in the following
12546   // analysis, so we prefer to do this when all types are legal.
12547   if (!DCI.isAfterLegalizeDAG())
12548     return SDValue();
12549 
12550   // If all users of SETCC extend its value to a legal integer type
12551   // then we replace SETCC with a subtraction
12552   for (SDNode::use_iterator UI = N->use_begin(),
12553        UE = N->use_end(); UI != UE; ++UI) {
12554     if (UI->getOpcode() != ISD::ZERO_EXTEND)
12555       return SDValue();
12556   }
12557 
12558   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12559   auto OpSize = N->getOperand(0).getValueSizeInBits();
12560 
12561   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
12562 
12563   if (OpSize < Size) {
12564     switch (CC) {
12565     default: break;
12566     case ISD::SETULT:
12567       return generateEquivalentSub(N, Size, false, false, DL, DAG);
12568     case ISD::SETULE:
12569       return generateEquivalentSub(N, Size, true, true, DL, DAG);
12570     case ISD::SETUGT:
12571       return generateEquivalentSub(N, Size, false, true, DL, DAG);
12572     case ISD::SETUGE:
12573       return generateEquivalentSub(N, Size, true, false, DL, DAG);
12574     }
12575   }
12576 
12577   return SDValue();
12578 }
12579 
12580 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
12581                                                   DAGCombinerInfo &DCI) const {
12582   SelectionDAG &DAG = DCI.DAG;
12583   SDLoc dl(N);
12584 
12585   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
12586   // If we're tracking CR bits, we need to be careful that we don't have:
12587   //   trunc(binary-ops(zext(x), zext(y)))
12588   // or
12589   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
12590   // such that we're unnecessarily moving things into GPRs when it would be
12591   // better to keep them in CR bits.
12592 
12593   // Note that trunc here can be an actual i1 trunc, or can be the effective
12594   // truncation that comes from a setcc or select_cc.
12595   if (N->getOpcode() == ISD::TRUNCATE &&
12596       N->getValueType(0) != MVT::i1)
12597     return SDValue();
12598 
12599   if (N->getOperand(0).getValueType() != MVT::i32 &&
12600       N->getOperand(0).getValueType() != MVT::i64)
12601     return SDValue();
12602 
12603   if (N->getOpcode() == ISD::SETCC ||
12604       N->getOpcode() == ISD::SELECT_CC) {
12605     // If we're looking at a comparison, then we need to make sure that the
12606     // high bits (all except for the first) don't matter the result.
12607     ISD::CondCode CC =
12608       cast<CondCodeSDNode>(N->getOperand(
12609         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
12610     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
12611 
12612     if (ISD::isSignedIntSetCC(CC)) {
12613       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
12614           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
12615         return SDValue();
12616     } else if (ISD::isUnsignedIntSetCC(CC)) {
12617       if (!DAG.MaskedValueIsZero(N->getOperand(0),
12618                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
12619           !DAG.MaskedValueIsZero(N->getOperand(1),
12620                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
12621         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
12622                                              : SDValue());
12623     } else {
12624       // This is neither a signed nor an unsigned comparison, just make sure
12625       // that the high bits are equal.
12626       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
12627       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
12628 
12629       // We don't really care about what is known about the first bit (if
12630       // anything), so clear it in all masks prior to comparing them.
12631       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
12632       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
12633 
12634       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
12635         return SDValue();
12636     }
12637   }
12638 
12639   // We now know that the higher-order bits are irrelevant, we just need to
12640   // make sure that all of the intermediate operations are bit operations, and
12641   // all inputs are extensions.
12642   if (N->getOperand(0).getOpcode() != ISD::AND &&
12643       N->getOperand(0).getOpcode() != ISD::OR  &&
12644       N->getOperand(0).getOpcode() != ISD::XOR &&
12645       N->getOperand(0).getOpcode() != ISD::SELECT &&
12646       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
12647       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
12648       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
12649       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
12650       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
12651     return SDValue();
12652 
12653   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
12654       N->getOperand(1).getOpcode() != ISD::AND &&
12655       N->getOperand(1).getOpcode() != ISD::OR  &&
12656       N->getOperand(1).getOpcode() != ISD::XOR &&
12657       N->getOperand(1).getOpcode() != ISD::SELECT &&
12658       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
12659       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
12660       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
12661       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
12662       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
12663     return SDValue();
12664 
12665   SmallVector<SDValue, 4> Inputs;
12666   SmallVector<SDValue, 8> BinOps, PromOps;
12667   SmallPtrSet<SDNode *, 16> Visited;
12668 
12669   for (unsigned i = 0; i < 2; ++i) {
12670     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12671           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12672           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12673           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12674         isa<ConstantSDNode>(N->getOperand(i)))
12675       Inputs.push_back(N->getOperand(i));
12676     else
12677       BinOps.push_back(N->getOperand(i));
12678 
12679     if (N->getOpcode() == ISD::TRUNCATE)
12680       break;
12681   }
12682 
12683   // Visit all inputs, collect all binary operations (and, or, xor and
12684   // select) that are all fed by extensions.
12685   while (!BinOps.empty()) {
12686     SDValue BinOp = BinOps.back();
12687     BinOps.pop_back();
12688 
12689     if (!Visited.insert(BinOp.getNode()).second)
12690       continue;
12691 
12692     PromOps.push_back(BinOp);
12693 
12694     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12695       // The condition of the select is not promoted.
12696       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12697         continue;
12698       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12699         continue;
12700 
12701       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12702             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12703             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12704            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12705           isa<ConstantSDNode>(BinOp.getOperand(i))) {
12706         Inputs.push_back(BinOp.getOperand(i));
12707       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12708                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
12709                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12710                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12711                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
12712                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12713                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12714                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12715                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
12716         BinOps.push_back(BinOp.getOperand(i));
12717       } else {
12718         // We have an input that is not an extension or another binary
12719         // operation; we'll abort this transformation.
12720         return SDValue();
12721       }
12722     }
12723   }
12724 
12725   // Make sure that this is a self-contained cluster of operations (which
12726   // is not quite the same thing as saying that everything has only one
12727   // use).
12728   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12729     if (isa<ConstantSDNode>(Inputs[i]))
12730       continue;
12731 
12732     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12733                               UE = Inputs[i].getNode()->use_end();
12734          UI != UE; ++UI) {
12735       SDNode *User = *UI;
12736       if (User != N && !Visited.count(User))
12737         return SDValue();
12738 
12739       // Make sure that we're not going to promote the non-output-value
12740       // operand(s) or SELECT or SELECT_CC.
12741       // FIXME: Although we could sometimes handle this, and it does occur in
12742       // practice that one of the condition inputs to the select is also one of
12743       // the outputs, we currently can't deal with this.
12744       if (User->getOpcode() == ISD::SELECT) {
12745         if (User->getOperand(0) == Inputs[i])
12746           return SDValue();
12747       } else if (User->getOpcode() == ISD::SELECT_CC) {
12748         if (User->getOperand(0) == Inputs[i] ||
12749             User->getOperand(1) == Inputs[i])
12750           return SDValue();
12751       }
12752     }
12753   }
12754 
12755   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12756     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12757                               UE = PromOps[i].getNode()->use_end();
12758          UI != UE; ++UI) {
12759       SDNode *User = *UI;
12760       if (User != N && !Visited.count(User))
12761         return SDValue();
12762 
12763       // Make sure that we're not going to promote the non-output-value
12764       // operand(s) or SELECT or SELECT_CC.
12765       // FIXME: Although we could sometimes handle this, and it does occur in
12766       // practice that one of the condition inputs to the select is also one of
12767       // the outputs, we currently can't deal with this.
12768       if (User->getOpcode() == ISD::SELECT) {
12769         if (User->getOperand(0) == PromOps[i])
12770           return SDValue();
12771       } else if (User->getOpcode() == ISD::SELECT_CC) {
12772         if (User->getOperand(0) == PromOps[i] ||
12773             User->getOperand(1) == PromOps[i])
12774           return SDValue();
12775       }
12776     }
12777   }
12778 
12779   // Replace all inputs with the extension operand.
12780   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12781     // Constants may have users outside the cluster of to-be-promoted nodes,
12782     // and so we need to replace those as we do the promotions.
12783     if (isa<ConstantSDNode>(Inputs[i]))
12784       continue;
12785     else
12786       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12787   }
12788 
12789   std::list<HandleSDNode> PromOpHandles;
12790   for (auto &PromOp : PromOps)
12791     PromOpHandles.emplace_back(PromOp);
12792 
12793   // Replace all operations (these are all the same, but have a different
12794   // (i1) return type). DAG.getNode will validate that the types of
12795   // a binary operator match, so go through the list in reverse so that
12796   // we've likely promoted both operands first. Any intermediate truncations or
12797   // extensions disappear.
12798   while (!PromOpHandles.empty()) {
12799     SDValue PromOp = PromOpHandles.back().getValue();
12800     PromOpHandles.pop_back();
12801 
12802     if (PromOp.getOpcode() == ISD::TRUNCATE ||
12803         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12804         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12805         PromOp.getOpcode() == ISD::ANY_EXTEND) {
12806       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12807           PromOp.getOperand(0).getValueType() != MVT::i1) {
12808         // The operand is not yet ready (see comment below).
12809         PromOpHandles.emplace_front(PromOp);
12810         continue;
12811       }
12812 
12813       SDValue RepValue = PromOp.getOperand(0);
12814       if (isa<ConstantSDNode>(RepValue))
12815         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
12816 
12817       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
12818       continue;
12819     }
12820 
12821     unsigned C;
12822     switch (PromOp.getOpcode()) {
12823     default:             C = 0; break;
12824     case ISD::SELECT:    C = 1; break;
12825     case ISD::SELECT_CC: C = 2; break;
12826     }
12827 
12828     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12829          PromOp.getOperand(C).getValueType() != MVT::i1) ||
12830         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12831          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12832       // The to-be-promoted operands of this node have not yet been
12833       // promoted (this should be rare because we're going through the
12834       // list backward, but if one of the operands has several users in
12835       // this cluster of to-be-promoted nodes, it is possible).
12836       PromOpHandles.emplace_front(PromOp);
12837       continue;
12838     }
12839 
12840     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12841                                 PromOp.getNode()->op_end());
12842 
12843     // If there are any constant inputs, make sure they're replaced now.
12844     for (unsigned i = 0; i < 2; ++i)
12845       if (isa<ConstantSDNode>(Ops[C+i]))
12846         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
12847 
12848     DAG.ReplaceAllUsesOfValueWith(PromOp,
12849       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
12850   }
12851 
12852   // Now we're left with the initial truncation itself.
12853   if (N->getOpcode() == ISD::TRUNCATE)
12854     return N->getOperand(0);
12855 
12856   // Otherwise, this is a comparison. The operands to be compared have just
12857   // changed type (to i1), but everything else is the same.
12858   return SDValue(N, 0);
12859 }
12860 
12861 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
12862                                                   DAGCombinerInfo &DCI) const {
12863   SelectionDAG &DAG = DCI.DAG;
12864   SDLoc dl(N);
12865 
12866   // If we're tracking CR bits, we need to be careful that we don't have:
12867   //   zext(binary-ops(trunc(x), trunc(y)))
12868   // or
12869   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12870   // such that we're unnecessarily moving things into CR bits that can more
12871   // efficiently stay in GPRs. Note that if we're not certain that the high
12872   // bits are set as required by the final extension, we still may need to do
12873   // some masking to get the proper behavior.
12874 
12875   // This same functionality is important on PPC64 when dealing with
12876   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12877   // the return values of functions. Because it is so similar, it is handled
12878   // here as well.
12879 
12880   if (N->getValueType(0) != MVT::i32 &&
12881       N->getValueType(0) != MVT::i64)
12882     return SDValue();
12883 
12884   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
12885         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
12886     return SDValue();
12887 
12888   if (N->getOperand(0).getOpcode() != ISD::AND &&
12889       N->getOperand(0).getOpcode() != ISD::OR  &&
12890       N->getOperand(0).getOpcode() != ISD::XOR &&
12891       N->getOperand(0).getOpcode() != ISD::SELECT &&
12892       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
12893     return SDValue();
12894 
12895   SmallVector<SDValue, 4> Inputs;
12896   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
12897   SmallPtrSet<SDNode *, 16> Visited;
12898 
12899   // Visit all inputs, collect all binary operations (and, or, xor and
12900   // select) that are all fed by truncations.
12901   while (!BinOps.empty()) {
12902     SDValue BinOp = BinOps.back();
12903     BinOps.pop_back();
12904 
12905     if (!Visited.insert(BinOp.getNode()).second)
12906       continue;
12907 
12908     PromOps.push_back(BinOp);
12909 
12910     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12911       // The condition of the select is not promoted.
12912       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12913         continue;
12914       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12915         continue;
12916 
12917       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12918           isa<ConstantSDNode>(BinOp.getOperand(i))) {
12919         Inputs.push_back(BinOp.getOperand(i));
12920       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12921                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
12922                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12923                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12924                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
12925         BinOps.push_back(BinOp.getOperand(i));
12926       } else {
12927         // We have an input that is not a truncation or another binary
12928         // operation; we'll abort this transformation.
12929         return SDValue();
12930       }
12931     }
12932   }
12933 
12934   // The operands of a select that must be truncated when the select is
12935   // promoted because the operand is actually part of the to-be-promoted set.
12936   DenseMap<SDNode *, EVT> SelectTruncOp[2];
12937 
12938   // Make sure that this is a self-contained cluster of operations (which
12939   // is not quite the same thing as saying that everything has only one
12940   // use).
12941   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12942     if (isa<ConstantSDNode>(Inputs[i]))
12943       continue;
12944 
12945     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12946                               UE = Inputs[i].getNode()->use_end();
12947          UI != UE; ++UI) {
12948       SDNode *User = *UI;
12949       if (User != N && !Visited.count(User))
12950         return SDValue();
12951 
12952       // If we're going to promote the non-output-value operand(s) or SELECT or
12953       // SELECT_CC, record them for truncation.
12954       if (User->getOpcode() == ISD::SELECT) {
12955         if (User->getOperand(0) == Inputs[i])
12956           SelectTruncOp[0].insert(std::make_pair(User,
12957                                     User->getOperand(0).getValueType()));
12958       } else if (User->getOpcode() == ISD::SELECT_CC) {
12959         if (User->getOperand(0) == Inputs[i])
12960           SelectTruncOp[0].insert(std::make_pair(User,
12961                                     User->getOperand(0).getValueType()));
12962         if (User->getOperand(1) == Inputs[i])
12963           SelectTruncOp[1].insert(std::make_pair(User,
12964                                     User->getOperand(1).getValueType()));
12965       }
12966     }
12967   }
12968 
12969   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12970     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12971                               UE = PromOps[i].getNode()->use_end();
12972          UI != UE; ++UI) {
12973       SDNode *User = *UI;
12974       if (User != N && !Visited.count(User))
12975         return SDValue();
12976 
12977       // If we're going to promote the non-output-value operand(s) or SELECT or
12978       // SELECT_CC, record them for truncation.
12979       if (User->getOpcode() == ISD::SELECT) {
12980         if (User->getOperand(0) == PromOps[i])
12981           SelectTruncOp[0].insert(std::make_pair(User,
12982                                     User->getOperand(0).getValueType()));
12983       } else if (User->getOpcode() == ISD::SELECT_CC) {
12984         if (User->getOperand(0) == PromOps[i])
12985           SelectTruncOp[0].insert(std::make_pair(User,
12986                                     User->getOperand(0).getValueType()));
12987         if (User->getOperand(1) == PromOps[i])
12988           SelectTruncOp[1].insert(std::make_pair(User,
12989                                     User->getOperand(1).getValueType()));
12990       }
12991     }
12992   }
12993 
12994   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
12995   bool ReallyNeedsExt = false;
12996   if (N->getOpcode() != ISD::ANY_EXTEND) {
12997     // If all of the inputs are not already sign/zero extended, then
12998     // we'll still need to do that at the end.
12999     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13000       if (isa<ConstantSDNode>(Inputs[i]))
13001         continue;
13002 
13003       unsigned OpBits =
13004         Inputs[i].getOperand(0).getValueSizeInBits();
13005       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13006 
13007       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13008            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13009                                   APInt::getHighBitsSet(OpBits,
13010                                                         OpBits-PromBits))) ||
13011           (N->getOpcode() == ISD::SIGN_EXTEND &&
13012            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13013              (OpBits-(PromBits-1)))) {
13014         ReallyNeedsExt = true;
13015         break;
13016       }
13017     }
13018   }
13019 
13020   // Replace all inputs, either with the truncation operand, or a
13021   // truncation or extension to the final output type.
13022   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13023     // Constant inputs need to be replaced with the to-be-promoted nodes that
13024     // use them because they might have users outside of the cluster of
13025     // promoted nodes.
13026     if (isa<ConstantSDNode>(Inputs[i]))
13027       continue;
13028 
13029     SDValue InSrc = Inputs[i].getOperand(0);
13030     if (Inputs[i].getValueType() == N->getValueType(0))
13031       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13032     else if (N->getOpcode() == ISD::SIGN_EXTEND)
13033       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13034         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13035     else if (N->getOpcode() == ISD::ZERO_EXTEND)
13036       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13037         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13038     else
13039       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13040         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13041   }
13042 
13043   std::list<HandleSDNode> PromOpHandles;
13044   for (auto &PromOp : PromOps)
13045     PromOpHandles.emplace_back(PromOp);
13046 
13047   // Replace all operations (these are all the same, but have a different
13048   // (promoted) return type). DAG.getNode will validate that the types of
13049   // a binary operator match, so go through the list in reverse so that
13050   // we've likely promoted both operands first.
13051   while (!PromOpHandles.empty()) {
13052     SDValue PromOp = PromOpHandles.back().getValue();
13053     PromOpHandles.pop_back();
13054 
13055     unsigned C;
13056     switch (PromOp.getOpcode()) {
13057     default:             C = 0; break;
13058     case ISD::SELECT:    C = 1; break;
13059     case ISD::SELECT_CC: C = 2; break;
13060     }
13061 
13062     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13063          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13064         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13065          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13066       // The to-be-promoted operands of this node have not yet been
13067       // promoted (this should be rare because we're going through the
13068       // list backward, but if one of the operands has several users in
13069       // this cluster of to-be-promoted nodes, it is possible).
13070       PromOpHandles.emplace_front(PromOp);
13071       continue;
13072     }
13073 
13074     // For SELECT and SELECT_CC nodes, we do a similar check for any
13075     // to-be-promoted comparison inputs.
13076     if (PromOp.getOpcode() == ISD::SELECT ||
13077         PromOp.getOpcode() == ISD::SELECT_CC) {
13078       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13079            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13080           (SelectTruncOp[1].count(PromOp.getNode()) &&
13081            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13082         PromOpHandles.emplace_front(PromOp);
13083         continue;
13084       }
13085     }
13086 
13087     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13088                                 PromOp.getNode()->op_end());
13089 
13090     // If this node has constant inputs, then they'll need to be promoted here.
13091     for (unsigned i = 0; i < 2; ++i) {
13092       if (!isa<ConstantSDNode>(Ops[C+i]))
13093         continue;
13094       if (Ops[C+i].getValueType() == N->getValueType(0))
13095         continue;
13096 
13097       if (N->getOpcode() == ISD::SIGN_EXTEND)
13098         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13099       else if (N->getOpcode() == ISD::ZERO_EXTEND)
13100         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13101       else
13102         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13103     }
13104 
13105     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13106     // truncate them again to the original value type.
13107     if (PromOp.getOpcode() == ISD::SELECT ||
13108         PromOp.getOpcode() == ISD::SELECT_CC) {
13109       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13110       if (SI0 != SelectTruncOp[0].end())
13111         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13112       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13113       if (SI1 != SelectTruncOp[1].end())
13114         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13115     }
13116 
13117     DAG.ReplaceAllUsesOfValueWith(PromOp,
13118       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13119   }
13120 
13121   // Now we're left with the initial extension itself.
13122   if (!ReallyNeedsExt)
13123     return N->getOperand(0);
13124 
13125   // To zero extend, just mask off everything except for the first bit (in the
13126   // i1 case).
13127   if (N->getOpcode() == ISD::ZERO_EXTEND)
13128     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13129                        DAG.getConstant(APInt::getLowBitsSet(
13130                                          N->getValueSizeInBits(0), PromBits),
13131                                        dl, N->getValueType(0)));
13132 
13133   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13134          "Invalid extension type");
13135   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13136   SDValue ShiftCst =
13137       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13138   return DAG.getNode(
13139       ISD::SRA, dl, N->getValueType(0),
13140       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13141       ShiftCst);
13142 }
13143 
13144 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13145                                         DAGCombinerInfo &DCI) const {
13146   assert(N->getOpcode() == ISD::SETCC &&
13147          "Should be called with a SETCC node");
13148 
13149   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13150   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13151     SDValue LHS = N->getOperand(0);
13152     SDValue RHS = N->getOperand(1);
13153 
13154     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13155     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13156         LHS.hasOneUse())
13157       std::swap(LHS, RHS);
13158 
13159     // x == 0-y --> x+y == 0
13160     // x != 0-y --> x+y != 0
13161     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13162         RHS.hasOneUse()) {
13163       SDLoc DL(N);
13164       SelectionDAG &DAG = DCI.DAG;
13165       EVT VT = N->getValueType(0);
13166       EVT OpVT = LHS.getValueType();
13167       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13168       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13169     }
13170   }
13171 
13172   return DAGCombineTruncBoolExt(N, DCI);
13173 }
13174 
13175 // Is this an extending load from an f32 to an f64?
13176 static bool isFPExtLoad(SDValue Op) {
13177   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13178     return LD->getExtensionType() == ISD::EXTLOAD &&
13179       Op.getValueType() == MVT::f64;
13180   return false;
13181 }
13182 
13183 /// Reduces the number of fp-to-int conversion when building a vector.
13184 ///
13185 /// If this vector is built out of floating to integer conversions,
13186 /// transform it to a vector built out of floating point values followed by a
13187 /// single floating to integer conversion of the vector.
13188 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
13189 /// becomes (fptosi (build_vector ($A, $B, ...)))
13190 SDValue PPCTargetLowering::
13191 combineElementTruncationToVectorTruncation(SDNode *N,
13192                                            DAGCombinerInfo &DCI) const {
13193   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13194          "Should be called with a BUILD_VECTOR node");
13195 
13196   SelectionDAG &DAG = DCI.DAG;
13197   SDLoc dl(N);
13198 
13199   SDValue FirstInput = N->getOperand(0);
13200   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13201          "The input operand must be an fp-to-int conversion.");
13202 
13203   // This combine happens after legalization so the fp_to_[su]i nodes are
13204   // already converted to PPCSISD nodes.
13205   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13206   if (FirstConversion == PPCISD::FCTIDZ ||
13207       FirstConversion == PPCISD::FCTIDUZ ||
13208       FirstConversion == PPCISD::FCTIWZ ||
13209       FirstConversion == PPCISD::FCTIWUZ) {
13210     bool IsSplat = true;
13211     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13212       FirstConversion == PPCISD::FCTIWUZ;
13213     EVT SrcVT = FirstInput.getOperand(0).getValueType();
13214     SmallVector<SDValue, 4> Ops;
13215     EVT TargetVT = N->getValueType(0);
13216     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13217       SDValue NextOp = N->getOperand(i);
13218       if (NextOp.getOpcode() != PPCISD::MFVSR)
13219         return SDValue();
13220       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13221       if (NextConversion != FirstConversion)
13222         return SDValue();
13223       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13224       // This is not valid if the input was originally double precision. It is
13225       // also not profitable to do unless this is an extending load in which
13226       // case doing this combine will allow us to combine consecutive loads.
13227       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13228         return SDValue();
13229       if (N->getOperand(i) != FirstInput)
13230         IsSplat = false;
13231     }
13232 
13233     // If this is a splat, we leave it as-is since there will be only a single
13234     // fp-to-int conversion followed by a splat of the integer. This is better
13235     // for 32-bit and smaller ints and neutral for 64-bit ints.
13236     if (IsSplat)
13237       return SDValue();
13238 
13239     // Now that we know we have the right type of node, get its operands
13240     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13241       SDValue In = N->getOperand(i).getOperand(0);
13242       if (Is32Bit) {
13243         // For 32-bit values, we need to add an FP_ROUND node (if we made it
13244         // here, we know that all inputs are extending loads so this is safe).
13245         if (In.isUndef())
13246           Ops.push_back(DAG.getUNDEF(SrcVT));
13247         else {
13248           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13249                                       MVT::f32, In.getOperand(0),
13250                                       DAG.getIntPtrConstant(1, dl));
13251           Ops.push_back(Trunc);
13252         }
13253       } else
13254         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13255     }
13256 
13257     unsigned Opcode;
13258     if (FirstConversion == PPCISD::FCTIDZ ||
13259         FirstConversion == PPCISD::FCTIWZ)
13260       Opcode = ISD::FP_TO_SINT;
13261     else
13262       Opcode = ISD::FP_TO_UINT;
13263 
13264     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13265     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13266     return DAG.getNode(Opcode, dl, TargetVT, BV);
13267   }
13268   return SDValue();
13269 }
13270 
13271 /// Reduce the number of loads when building a vector.
13272 ///
13273 /// Building a vector out of multiple loads can be converted to a load
13274 /// of the vector type if the loads are consecutive. If the loads are
13275 /// consecutive but in descending order, a shuffle is added at the end
13276 /// to reorder the vector.
13277 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
13278   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13279          "Should be called with a BUILD_VECTOR node");
13280 
13281   SDLoc dl(N);
13282 
13283   // Return early for non byte-sized type, as they can't be consecutive.
13284   if (!N->getValueType(0).getVectorElementType().isByteSized())
13285     return SDValue();
13286 
13287   bool InputsAreConsecutiveLoads = true;
13288   bool InputsAreReverseConsecutive = true;
13289   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13290   SDValue FirstInput = N->getOperand(0);
13291   bool IsRoundOfExtLoad = false;
13292 
13293   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13294       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13295     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13296     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13297   }
13298   // Not a build vector of (possibly fp_rounded) loads.
13299   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13300       N->getNumOperands() == 1)
13301     return SDValue();
13302 
13303   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13304     // If any inputs are fp_round(extload), they all must be.
13305     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13306       return SDValue();
13307 
13308     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13309       N->getOperand(i);
13310     if (NextInput.getOpcode() != ISD::LOAD)
13311       return SDValue();
13312 
13313     SDValue PreviousInput =
13314       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13315     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13316     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13317 
13318     // If any inputs are fp_round(extload), they all must be.
13319     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13320       return SDValue();
13321 
13322     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13323       InputsAreConsecutiveLoads = false;
13324     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13325       InputsAreReverseConsecutive = false;
13326 
13327     // Exit early if the loads are neither consecutive nor reverse consecutive.
13328     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13329       return SDValue();
13330   }
13331 
13332   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13333          "The loads cannot be both consecutive and reverse consecutive.");
13334 
13335   SDValue FirstLoadOp =
13336     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13337   SDValue LastLoadOp =
13338     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13339                        N->getOperand(N->getNumOperands()-1);
13340 
13341   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13342   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13343   if (InputsAreConsecutiveLoads) {
13344     assert(LD1 && "Input needs to be a LoadSDNode.");
13345     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13346                        LD1->getBasePtr(), LD1->getPointerInfo(),
13347                        LD1->getAlignment());
13348   }
13349   if (InputsAreReverseConsecutive) {
13350     assert(LDL && "Input needs to be a LoadSDNode.");
13351     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13352                                LDL->getBasePtr(), LDL->getPointerInfo(),
13353                                LDL->getAlignment());
13354     SmallVector<int, 16> Ops;
13355     for (int i = N->getNumOperands() - 1; i >= 0; i--)
13356       Ops.push_back(i);
13357 
13358     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13359                                 DAG.getUNDEF(N->getValueType(0)), Ops);
13360   }
13361   return SDValue();
13362 }
13363 
13364 // This function adds the required vector_shuffle needed to get
13365 // the elements of the vector extract in the correct position
13366 // as specified by the CorrectElems encoding.
13367 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
13368                                       SDValue Input, uint64_t Elems,
13369                                       uint64_t CorrectElems) {
13370   SDLoc dl(N);
13371 
13372   unsigned NumElems = Input.getValueType().getVectorNumElements();
13373   SmallVector<int, 16> ShuffleMask(NumElems, -1);
13374 
13375   // Knowing the element indices being extracted from the original
13376   // vector and the order in which they're being inserted, just put
13377   // them at element indices required for the instruction.
13378   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13379     if (DAG.getDataLayout().isLittleEndian())
13380       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13381     else
13382       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13383     CorrectElems = CorrectElems >> 8;
13384     Elems = Elems >> 8;
13385   }
13386 
13387   SDValue Shuffle =
13388       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13389                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13390 
13391   EVT VT = N->getValueType(0);
13392   SDValue Conv = DAG.getBitcast(VT, Shuffle);
13393 
13394   EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13395                                Input.getValueType().getVectorElementType(),
13396                                VT.getVectorNumElements());
13397   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13398                      DAG.getValueType(ExtVT));
13399 }
13400 
13401 // Look for build vector patterns where input operands come from sign
13402 // extended vector_extract elements of specific indices. If the correct indices
13403 // aren't used, add a vector shuffle to fix up the indices and create
13404 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13405 // during instruction selection.
13406 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
13407   // This array encodes the indices that the vector sign extend instructions
13408   // extract from when extending from one type to another for both BE and LE.
13409   // The right nibble of each byte corresponds to the LE incides.
13410   // and the left nibble of each byte corresponds to the BE incides.
13411   // For example: 0x3074B8FC  byte->word
13412   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13413   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13414   // For example: 0x000070F8  byte->double word
13415   // For LE: the allowed indices are: 0x0,0x8
13416   // For BE: the allowed indices are: 0x7,0xF
13417   uint64_t TargetElems[] = {
13418       0x3074B8FC, // b->w
13419       0x000070F8, // b->d
13420       0x10325476, // h->w
13421       0x00003074, // h->d
13422       0x00001032, // w->d
13423   };
13424 
13425   uint64_t Elems = 0;
13426   int Index;
13427   SDValue Input;
13428 
13429   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13430     if (!Op)
13431       return false;
13432     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13433         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13434       return false;
13435 
13436     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13437     // of the right width.
13438     SDValue Extract = Op.getOperand(0);
13439     if (Extract.getOpcode() == ISD::ANY_EXTEND)
13440       Extract = Extract.getOperand(0);
13441     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13442       return false;
13443 
13444     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13445     if (!ExtOp)
13446       return false;
13447 
13448     Index = ExtOp->getZExtValue();
13449     if (Input && Input != Extract.getOperand(0))
13450       return false;
13451 
13452     if (!Input)
13453       Input = Extract.getOperand(0);
13454 
13455     Elems = Elems << 8;
13456     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13457     Elems |= Index;
13458 
13459     return true;
13460   };
13461 
13462   // If the build vector operands aren't sign extended vector extracts,
13463   // of the same input vector, then return.
13464   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13465     if (!isSExtOfVecExtract(N->getOperand(i))) {
13466       return SDValue();
13467     }
13468   }
13469 
13470   // If the vector extract indicies are not correct, add the appropriate
13471   // vector_shuffle.
13472   int TgtElemArrayIdx;
13473   int InputSize = Input.getValueType().getScalarSizeInBits();
13474   int OutputSize = N->getValueType(0).getScalarSizeInBits();
13475   if (InputSize + OutputSize == 40)
13476     TgtElemArrayIdx = 0;
13477   else if (InputSize + OutputSize == 72)
13478     TgtElemArrayIdx = 1;
13479   else if (InputSize + OutputSize == 48)
13480     TgtElemArrayIdx = 2;
13481   else if (InputSize + OutputSize == 80)
13482     TgtElemArrayIdx = 3;
13483   else if (InputSize + OutputSize == 96)
13484     TgtElemArrayIdx = 4;
13485   else
13486     return SDValue();
13487 
13488   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13489   CorrectElems = DAG.getDataLayout().isLittleEndian()
13490                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13491                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13492   if (Elems != CorrectElems) {
13493     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13494   }
13495 
13496   // Regular lowering will catch cases where a shuffle is not needed.
13497   return SDValue();
13498 }
13499 
13500 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
13501                                                  DAGCombinerInfo &DCI) const {
13502   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13503          "Should be called with a BUILD_VECTOR node");
13504 
13505   SelectionDAG &DAG = DCI.DAG;
13506   SDLoc dl(N);
13507 
13508   if (!Subtarget.hasVSX())
13509     return SDValue();
13510 
13511   // The target independent DAG combiner will leave a build_vector of
13512   // float-to-int conversions intact. We can generate MUCH better code for
13513   // a float-to-int conversion of a vector of floats.
13514   SDValue FirstInput = N->getOperand(0);
13515   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
13516     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
13517     if (Reduced)
13518       return Reduced;
13519   }
13520 
13521   // If we're building a vector out of consecutive loads, just load that
13522   // vector type.
13523   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
13524   if (Reduced)
13525     return Reduced;
13526 
13527   // If we're building a vector out of extended elements from another vector
13528   // we have P9 vector integer extend instructions. The code assumes legal
13529   // input types (i.e. it can't handle things like v4i16) so do not run before
13530   // legalization.
13531   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
13532     Reduced = combineBVOfVecSExt(N, DAG);
13533     if (Reduced)
13534       return Reduced;
13535   }
13536 
13537 
13538   if (N->getValueType(0) != MVT::v2f64)
13539     return SDValue();
13540 
13541   // Looking for:
13542   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
13543   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
13544       FirstInput.getOpcode() != ISD::UINT_TO_FP)
13545     return SDValue();
13546   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
13547       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
13548     return SDValue();
13549   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
13550     return SDValue();
13551 
13552   SDValue Ext1 = FirstInput.getOperand(0);
13553   SDValue Ext2 = N->getOperand(1).getOperand(0);
13554   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13555      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13556     return SDValue();
13557 
13558   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
13559   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
13560   if (!Ext1Op || !Ext2Op)
13561     return SDValue();
13562   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
13563       Ext1.getOperand(0) != Ext2.getOperand(0))
13564     return SDValue();
13565 
13566   int FirstElem = Ext1Op->getZExtValue();
13567   int SecondElem = Ext2Op->getZExtValue();
13568   int SubvecIdx;
13569   if (FirstElem == 0 && SecondElem == 1)
13570     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
13571   else if (FirstElem == 2 && SecondElem == 3)
13572     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
13573   else
13574     return SDValue();
13575 
13576   SDValue SrcVec = Ext1.getOperand(0);
13577   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
13578     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
13579   return DAG.getNode(NodeType, dl, MVT::v2f64,
13580                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
13581 }
13582 
13583 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
13584                                               DAGCombinerInfo &DCI) const {
13585   assert((N->getOpcode() == ISD::SINT_TO_FP ||
13586           N->getOpcode() == ISD::UINT_TO_FP) &&
13587          "Need an int -> FP conversion node here");
13588 
13589   if (useSoftFloat() || !Subtarget.has64BitSupport())
13590     return SDValue();
13591 
13592   SelectionDAG &DAG = DCI.DAG;
13593   SDLoc dl(N);
13594   SDValue Op(N, 0);
13595 
13596   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
13597   // from the hardware.
13598   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
13599     return SDValue();
13600   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
13601       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
13602     return SDValue();
13603 
13604   SDValue FirstOperand(Op.getOperand(0));
13605   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
13606     (FirstOperand.getValueType() == MVT::i8 ||
13607      FirstOperand.getValueType() == MVT::i16);
13608   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
13609     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
13610     bool DstDouble = Op.getValueType() == MVT::f64;
13611     unsigned ConvOp = Signed ?
13612       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
13613       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
13614     SDValue WidthConst =
13615       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
13616                             dl, false);
13617     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
13618     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
13619     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
13620                                          DAG.getVTList(MVT::f64, MVT::Other),
13621                                          Ops, MVT::i8, LDN->getMemOperand());
13622 
13623     // For signed conversion, we need to sign-extend the value in the VSR
13624     if (Signed) {
13625       SDValue ExtOps[] = { Ld, WidthConst };
13626       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
13627       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
13628     } else
13629       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
13630   }
13631 
13632 
13633   // For i32 intermediate values, unfortunately, the conversion functions
13634   // leave the upper 32 bits of the value are undefined. Within the set of
13635   // scalar instructions, we have no method for zero- or sign-extending the
13636   // value. Thus, we cannot handle i32 intermediate values here.
13637   if (Op.getOperand(0).getValueType() == MVT::i32)
13638     return SDValue();
13639 
13640   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
13641          "UINT_TO_FP is supported only with FPCVT");
13642 
13643   // If we have FCFIDS, then use it when converting to single-precision.
13644   // Otherwise, convert to double-precision and then round.
13645   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13646                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
13647                                                             : PPCISD::FCFIDS)
13648                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
13649                                                             : PPCISD::FCFID);
13650   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13651                   ? MVT::f32
13652                   : MVT::f64;
13653 
13654   // If we're converting from a float, to an int, and back to a float again,
13655   // then we don't need the store/load pair at all.
13656   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
13657        Subtarget.hasFPCVT()) ||
13658       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
13659     SDValue Src = Op.getOperand(0).getOperand(0);
13660     if (Src.getValueType() == MVT::f32) {
13661       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
13662       DCI.AddToWorklist(Src.getNode());
13663     } else if (Src.getValueType() != MVT::f64) {
13664       // Make sure that we don't pick up a ppc_fp128 source value.
13665       return SDValue();
13666     }
13667 
13668     unsigned FCTOp =
13669       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
13670                                                         PPCISD::FCTIDUZ;
13671 
13672     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
13673     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
13674 
13675     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
13676       FP = DAG.getNode(ISD::FP_ROUND, dl,
13677                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
13678       DCI.AddToWorklist(FP.getNode());
13679     }
13680 
13681     return FP;
13682   }
13683 
13684   return SDValue();
13685 }
13686 
13687 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
13688 // builtins) into loads with swaps.
13689 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
13690                                               DAGCombinerInfo &DCI) const {
13691   SelectionDAG &DAG = DCI.DAG;
13692   SDLoc dl(N);
13693   SDValue Chain;
13694   SDValue Base;
13695   MachineMemOperand *MMO;
13696 
13697   switch (N->getOpcode()) {
13698   default:
13699     llvm_unreachable("Unexpected opcode for little endian VSX load");
13700   case ISD::LOAD: {
13701     LoadSDNode *LD = cast<LoadSDNode>(N);
13702     Chain = LD->getChain();
13703     Base = LD->getBasePtr();
13704     MMO = LD->getMemOperand();
13705     // If the MMO suggests this isn't a load of a full vector, leave
13706     // things alone.  For a built-in, we have to make the change for
13707     // correctness, so if there is a size problem that will be a bug.
13708     if (MMO->getSize() < 16)
13709       return SDValue();
13710     break;
13711   }
13712   case ISD::INTRINSIC_W_CHAIN: {
13713     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13714     Chain = Intrin->getChain();
13715     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
13716     // us what we want. Get operand 2 instead.
13717     Base = Intrin->getOperand(2);
13718     MMO = Intrin->getMemOperand();
13719     break;
13720   }
13721   }
13722 
13723   MVT VecTy = N->getValueType(0).getSimpleVT();
13724 
13725   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
13726   // aligned and the type is a vector with elements up to 4 bytes
13727   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13728       VecTy.getScalarSizeInBits() <= 32) {
13729     return SDValue();
13730   }
13731 
13732   SDValue LoadOps[] = { Chain, Base };
13733   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
13734                                          DAG.getVTList(MVT::v2f64, MVT::Other),
13735                                          LoadOps, MVT::v2f64, MMO);
13736 
13737   DCI.AddToWorklist(Load.getNode());
13738   Chain = Load.getValue(1);
13739   SDValue Swap = DAG.getNode(
13740       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
13741   DCI.AddToWorklist(Swap.getNode());
13742 
13743   // Add a bitcast if the resulting load type doesn't match v2f64.
13744   if (VecTy != MVT::v2f64) {
13745     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
13746     DCI.AddToWorklist(N.getNode());
13747     // Package {bitcast value, swap's chain} to match Load's shape.
13748     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
13749                        N, Swap.getValue(1));
13750   }
13751 
13752   return Swap;
13753 }
13754 
13755 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13756 // builtins) into stores with swaps.
13757 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
13758                                                DAGCombinerInfo &DCI) const {
13759   SelectionDAG &DAG = DCI.DAG;
13760   SDLoc dl(N);
13761   SDValue Chain;
13762   SDValue Base;
13763   unsigned SrcOpnd;
13764   MachineMemOperand *MMO;
13765 
13766   switch (N->getOpcode()) {
13767   default:
13768     llvm_unreachable("Unexpected opcode for little endian VSX store");
13769   case ISD::STORE: {
13770     StoreSDNode *ST = cast<StoreSDNode>(N);
13771     Chain = ST->getChain();
13772     Base = ST->getBasePtr();
13773     MMO = ST->getMemOperand();
13774     SrcOpnd = 1;
13775     // If the MMO suggests this isn't a store of a full vector, leave
13776     // things alone.  For a built-in, we have to make the change for
13777     // correctness, so if there is a size problem that will be a bug.
13778     if (MMO->getSize() < 16)
13779       return SDValue();
13780     break;
13781   }
13782   case ISD::INTRINSIC_VOID: {
13783     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13784     Chain = Intrin->getChain();
13785     // Intrin->getBasePtr() oddly does not get what we want.
13786     Base = Intrin->getOperand(3);
13787     MMO = Intrin->getMemOperand();
13788     SrcOpnd = 2;
13789     break;
13790   }
13791   }
13792 
13793   SDValue Src = N->getOperand(SrcOpnd);
13794   MVT VecTy = Src.getValueType().getSimpleVT();
13795 
13796   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13797   // aligned and the type is a vector with elements up to 4 bytes
13798   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13799       VecTy.getScalarSizeInBits() <= 32) {
13800     return SDValue();
13801   }
13802 
13803   // All stores are done as v2f64 and possible bit cast.
13804   if (VecTy != MVT::v2f64) {
13805     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13806     DCI.AddToWorklist(Src.getNode());
13807   }
13808 
13809   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13810                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13811   DCI.AddToWorklist(Swap.getNode());
13812   Chain = Swap.getValue(1);
13813   SDValue StoreOps[] = { Chain, Swap, Base };
13814   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
13815                                           DAG.getVTList(MVT::Other),
13816                                           StoreOps, VecTy, MMO);
13817   DCI.AddToWorklist(Store.getNode());
13818   return Store;
13819 }
13820 
13821 // Handle DAG combine for STORE (FP_TO_INT F).
13822 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13823                                                DAGCombinerInfo &DCI) const {
13824 
13825   SelectionDAG &DAG = DCI.DAG;
13826   SDLoc dl(N);
13827   unsigned Opcode = N->getOperand(1).getOpcode();
13828 
13829   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
13830          && "Not a FP_TO_INT Instruction!");
13831 
13832   SDValue Val = N->getOperand(1).getOperand(0);
13833   EVT Op1VT = N->getOperand(1).getValueType();
13834   EVT ResVT = Val.getValueType();
13835 
13836   // Floating point types smaller than 32 bits are not legal on Power.
13837   if (ResVT.getScalarSizeInBits() < 32)
13838     return SDValue();
13839 
13840   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13841   bool ValidTypeForStoreFltAsInt =
13842         (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
13843          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
13844 
13845   if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
13846       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
13847     return SDValue();
13848 
13849   // Extend f32 values to f64
13850   if (ResVT.getScalarSizeInBits() == 32) {
13851     Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
13852     DCI.AddToWorklist(Val.getNode());
13853   }
13854 
13855   // Set signed or unsigned conversion opcode.
13856   unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
13857                           PPCISD::FP_TO_SINT_IN_VSR :
13858                           PPCISD::FP_TO_UINT_IN_VSR;
13859 
13860   Val = DAG.getNode(ConvOpcode,
13861                     dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
13862   DCI.AddToWorklist(Val.getNode());
13863 
13864   // Set number of bytes being converted.
13865   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
13866   SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
13867                     DAG.getIntPtrConstant(ByteSize, dl, false),
13868                     DAG.getValueType(Op1VT) };
13869 
13870   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
13871           DAG.getVTList(MVT::Other), Ops,
13872           cast<StoreSDNode>(N)->getMemoryVT(),
13873           cast<StoreSDNode>(N)->getMemOperand());
13874 
13875   DCI.AddToWorklist(Val.getNode());
13876   return Val;
13877 }
13878 
13879 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
13880   // Check that the source of the element keeps flipping
13881   // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
13882   bool PrevElemFromFirstVec = Mask[0] < NumElts;
13883   for (int i = 1, e = Mask.size(); i < e; i++) {
13884     if (PrevElemFromFirstVec && Mask[i] < NumElts)
13885       return false;
13886     if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
13887       return false;
13888     PrevElemFromFirstVec = !PrevElemFromFirstVec;
13889   }
13890   return true;
13891 }
13892 
13893 static bool isSplatBV(SDValue Op) {
13894   if (Op.getOpcode() != ISD::BUILD_VECTOR)
13895     return false;
13896   SDValue FirstOp;
13897 
13898   // Find first non-undef input.
13899   for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
13900     FirstOp = Op.getOperand(i);
13901     if (!FirstOp.isUndef())
13902       break;
13903   }
13904 
13905   // All inputs are undef or the same as the first non-undef input.
13906   for (int i = 1, e = Op.getNumOperands(); i < e; i++)
13907     if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
13908       return false;
13909   return true;
13910 }
13911 
13912 static SDValue isScalarToVec(SDValue Op) {
13913   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13914     return Op;
13915   if (Op.getOpcode() != ISD::BITCAST)
13916     return SDValue();
13917   Op = Op.getOperand(0);
13918   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13919     return Op;
13920   return SDValue();
13921 }
13922 
13923 static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
13924                                             int LHSMaxIdx, int RHSMinIdx,
13925                                             int RHSMaxIdx, int HalfVec) {
13926   for (int i = 0, e = ShuffV.size(); i < e; i++) {
13927     int Idx = ShuffV[i];
13928     if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
13929       ShuffV[i] += HalfVec;
13930   }
13931   return;
13932 }
13933 
13934 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
13935 // the original is:
13936 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
13937 // In such a case, just change the shuffle mask to extract the element
13938 // from the permuted index.
13939 static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
13940   SDLoc dl(OrigSToV);
13941   EVT VT = OrigSToV.getValueType();
13942   assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13943          "Expecting a SCALAR_TO_VECTOR here");
13944   SDValue Input = OrigSToV.getOperand(0);
13945 
13946   if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13947     ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
13948     SDValue OrigVector = Input.getOperand(0);
13949 
13950     // Can't handle non-const element indices or different vector types
13951     // for the input to the extract and the output of the scalar_to_vector.
13952     if (Idx && VT == OrigVector.getValueType()) {
13953       SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
13954       NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
13955       return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
13956     }
13957   }
13958   return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
13959                      OrigSToV.getOperand(0));
13960 }
13961 
13962 // On little endian subtargets, combine shuffles such as:
13963 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
13964 // into:
13965 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
13966 // because the latter can be matched to a single instruction merge.
13967 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
13968 // to put the value into element zero. Adjust the shuffle mask so that the
13969 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
13970 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
13971                                                 SelectionDAG &DAG) const {
13972   SDValue LHS = SVN->getOperand(0);
13973   SDValue RHS = SVN->getOperand(1);
13974   auto Mask = SVN->getMask();
13975   int NumElts = LHS.getValueType().getVectorNumElements();
13976   SDValue Res(SVN, 0);
13977   SDLoc dl(SVN);
13978 
13979   // None of these combines are useful on big endian systems since the ISA
13980   // already has a big endian bias.
13981   if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
13982     return Res;
13983 
13984   // If this is not a shuffle of a shuffle and the first element comes from
13985   // the second vector, canonicalize to the commuted form. This will make it
13986   // more likely to match one of the single instruction patterns.
13987   if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
13988       RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
13989     std::swap(LHS, RHS);
13990     Res = DAG.getCommutedVectorShuffle(*SVN);
13991     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
13992   }
13993 
13994   // Adjust the shuffle mask if either input vector comes from a
13995   // SCALAR_TO_VECTOR and keep the respective input vector in permuted
13996   // form (to prevent the need for a swap).
13997   SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
13998   SDValue SToVLHS = isScalarToVec(LHS);
13999   SDValue SToVRHS = isScalarToVec(RHS);
14000   if (SToVLHS || SToVRHS) {
14001     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14002                             : SToVRHS.getValueType().getVectorNumElements();
14003     int NumEltsOut = ShuffV.size();
14004 
14005     // Initially assume that neither input is permuted. These will be adjusted
14006     // accordingly if either input is.
14007     int LHSMaxIdx = -1;
14008     int RHSMinIdx = -1;
14009     int RHSMaxIdx = -1;
14010     int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14011 
14012     // Get the permuted scalar to vector nodes for the source(s) that come from
14013     // ISD::SCALAR_TO_VECTOR.
14014     if (SToVLHS) {
14015       // Set up the values for the shuffle vector fixup.
14016       LHSMaxIdx = NumEltsOut / NumEltsIn;
14017       SToVLHS = getSToVPermuted(SToVLHS, DAG);
14018       if (SToVLHS.getValueType() != LHS.getValueType())
14019         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14020       LHS = SToVLHS;
14021     }
14022     if (SToVRHS) {
14023       RHSMinIdx = NumEltsOut;
14024       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14025       SToVRHS = getSToVPermuted(SToVRHS, DAG);
14026       if (SToVRHS.getValueType() != RHS.getValueType())
14027         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14028       RHS = SToVRHS;
14029     }
14030 
14031     // Fix up the shuffle mask to reflect where the desired element actually is.
14032     // The minimum and maximum indices that correspond to element zero for both
14033     // the LHS and RHS are computed and will control which shuffle mask entries
14034     // are to be changed. For example, if the RHS is permuted, any shuffle mask
14035     // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14036     // HalfVec to refer to the corresponding element in the permuted vector.
14037     fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14038                                     HalfVec);
14039     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14040 
14041     // We may have simplified away the shuffle. We won't be able to do anything
14042     // further with it here.
14043     if (!isa<ShuffleVectorSDNode>(Res))
14044       return Res;
14045     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14046   }
14047 
14048   // The common case after we commuted the shuffle is that the RHS is a splat
14049   // and we have elements coming in from the splat at indices that are not
14050   // conducive to using a merge.
14051   // Example:
14052   // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14053   if (!isSplatBV(RHS))
14054     return Res;
14055 
14056   // We are looking for a mask such that all even elements are from
14057   // one vector and all odd elements from the other.
14058   if (!isAlternatingShuffMask(Mask, NumElts))
14059     return Res;
14060 
14061   // Adjust the mask so we are pulling in the same index from the splat
14062   // as the index from the interesting vector in consecutive elements.
14063   // Example (even elements from first vector):
14064   // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14065   if (Mask[0] < NumElts)
14066     for (int i = 1, e = Mask.size(); i < e; i += 2)
14067       ShuffV[i] = (ShuffV[i - 1] + NumElts);
14068   // Example (odd elements from first vector):
14069   // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14070   else
14071     for (int i = 0, e = Mask.size(); i < e; i += 2)
14072       ShuffV[i] = (ShuffV[i + 1] + NumElts);
14073 
14074   // If the RHS has undefs, we need to remove them since we may have created
14075   // a shuffle that adds those instead of the splat value.
14076   SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14077   RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14078 
14079   Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14080   return Res;
14081 }
14082 
14083 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14084                                                 LSBaseSDNode *LSBase,
14085                                                 DAGCombinerInfo &DCI) const {
14086   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14087         "Not a reverse memop pattern!");
14088 
14089   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14090     auto Mask = SVN->getMask();
14091     int i = 0;
14092     auto I = Mask.rbegin();
14093     auto E = Mask.rend();
14094 
14095     for (; I != E; ++I) {
14096       if (*I != i)
14097         return false;
14098       i++;
14099     }
14100     return true;
14101   };
14102 
14103   SelectionDAG &DAG = DCI.DAG;
14104   EVT VT = SVN->getValueType(0);
14105 
14106   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14107     return SDValue();
14108 
14109   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14110   // See comment in PPCVSXSwapRemoval.cpp.
14111   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14112   if (!Subtarget.hasP9Vector())
14113     return SDValue();
14114 
14115   if(!IsElementReverse(SVN))
14116     return SDValue();
14117 
14118   if (LSBase->getOpcode() == ISD::LOAD) {
14119     SDLoc dl(SVN);
14120     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14121     return DAG.getMemIntrinsicNode(
14122         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14123         LSBase->getMemoryVT(), LSBase->getMemOperand());
14124   }
14125 
14126   if (LSBase->getOpcode() == ISD::STORE) {
14127     SDLoc dl(LSBase);
14128     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14129                           LSBase->getBasePtr()};
14130     return DAG.getMemIntrinsicNode(
14131         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14132         LSBase->getMemoryVT(), LSBase->getMemOperand());
14133   }
14134 
14135   llvm_unreachable("Expected a load or store node here");
14136 }
14137 
14138 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
14139                                              DAGCombinerInfo &DCI) const {
14140   SelectionDAG &DAG = DCI.DAG;
14141   SDLoc dl(N);
14142   switch (N->getOpcode()) {
14143   default: break;
14144   case ISD::ADD:
14145     return combineADD(N, DCI);
14146   case ISD::SHL:
14147     return combineSHL(N, DCI);
14148   case ISD::SRA:
14149     return combineSRA(N, DCI);
14150   case ISD::SRL:
14151     return combineSRL(N, DCI);
14152   case ISD::MUL:
14153     return combineMUL(N, DCI);
14154   case ISD::FMA:
14155   case PPCISD::FNMSUB:
14156     return combineFMALike(N, DCI);
14157   case PPCISD::SHL:
14158     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14159         return N->getOperand(0);
14160     break;
14161   case PPCISD::SRL:
14162     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14163         return N->getOperand(0);
14164     break;
14165   case PPCISD::SRA:
14166     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14167       if (C->isNullValue() ||   //  0 >>s V -> 0.
14168           C->isAllOnesValue())    // -1 >>s V -> -1.
14169         return N->getOperand(0);
14170     }
14171     break;
14172   case ISD::SIGN_EXTEND:
14173   case ISD::ZERO_EXTEND:
14174   case ISD::ANY_EXTEND:
14175     return DAGCombineExtBoolTrunc(N, DCI);
14176   case ISD::TRUNCATE:
14177     return combineTRUNCATE(N, DCI);
14178   case ISD::SETCC:
14179     if (SDValue CSCC = combineSetCC(N, DCI))
14180       return CSCC;
14181     LLVM_FALLTHROUGH;
14182   case ISD::SELECT_CC:
14183     return DAGCombineTruncBoolExt(N, DCI);
14184   case ISD::SINT_TO_FP:
14185   case ISD::UINT_TO_FP:
14186     return combineFPToIntToFP(N, DCI);
14187   case ISD::VECTOR_SHUFFLE:
14188     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14189       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14190       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14191     }
14192     return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14193   case ISD::STORE: {
14194 
14195     EVT Op1VT = N->getOperand(1).getValueType();
14196     unsigned Opcode = N->getOperand(1).getOpcode();
14197 
14198     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14199       SDValue Val= combineStoreFPToInt(N, DCI);
14200       if (Val)
14201         return Val;
14202     }
14203 
14204     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14205       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14206       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14207       if (Val)
14208         return Val;
14209     }
14210 
14211     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14212     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14213         N->getOperand(1).getNode()->hasOneUse() &&
14214         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14215          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14216 
14217       // STBRX can only handle simple types and it makes no sense to store less
14218       // two bytes in byte-reversed order.
14219       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14220       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14221         break;
14222 
14223       SDValue BSwapOp = N->getOperand(1).getOperand(0);
14224       // Do an any-extend to 32-bits if this is a half-word input.
14225       if (BSwapOp.getValueType() == MVT::i16)
14226         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14227 
14228       // If the type of BSWAP operand is wider than stored memory width
14229       // it need to be shifted to the right side before STBRX.
14230       if (Op1VT.bitsGT(mVT)) {
14231         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14232         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14233                               DAG.getConstant(Shift, dl, MVT::i32));
14234         // Need to truncate if this is a bswap of i64 stored as i32/i16.
14235         if (Op1VT == MVT::i64)
14236           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14237       }
14238 
14239       SDValue Ops[] = {
14240         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14241       };
14242       return
14243         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
14244                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14245                                 cast<StoreSDNode>(N)->getMemOperand());
14246     }
14247 
14248     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
14249     // So it can increase the chance of CSE constant construction.
14250     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14251         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14252       // Need to sign-extended to 64-bits to handle negative values.
14253       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14254       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14255                                     MemVT.getSizeInBits());
14256       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14257 
14258       // DAG.getTruncStore() can't be used here because it doesn't accept
14259       // the general (base + offset) addressing mode.
14260       // So we use UpdateNodeOperands and setTruncatingStore instead.
14261       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14262                              N->getOperand(3));
14263       cast<StoreSDNode>(N)->setTruncatingStore(true);
14264       return SDValue(N, 0);
14265     }
14266 
14267     // For little endian, VSX stores require generating xxswapd/lxvd2x.
14268     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14269     if (Op1VT.isSimple()) {
14270       MVT StoreVT = Op1VT.getSimpleVT();
14271       if (Subtarget.needsSwapsForVSXMemOps() &&
14272           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14273            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14274         return expandVSXStoreForLE(N, DCI);
14275     }
14276     break;
14277   }
14278   case ISD::LOAD: {
14279     LoadSDNode *LD = cast<LoadSDNode>(N);
14280     EVT VT = LD->getValueType(0);
14281 
14282     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14283     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14284     if (VT.isSimple()) {
14285       MVT LoadVT = VT.getSimpleVT();
14286       if (Subtarget.needsSwapsForVSXMemOps() &&
14287           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14288            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14289         return expandVSXLoadForLE(N, DCI);
14290     }
14291 
14292     // We sometimes end up with a 64-bit integer load, from which we extract
14293     // two single-precision floating-point numbers. This happens with
14294     // std::complex<float>, and other similar structures, because of the way we
14295     // canonicalize structure copies. However, if we lack direct moves,
14296     // then the final bitcasts from the extracted integer values to the
14297     // floating-point numbers turn into store/load pairs. Even with direct moves,
14298     // just loading the two floating-point numbers is likely better.
14299     auto ReplaceTwoFloatLoad = [&]() {
14300       if (VT != MVT::i64)
14301         return false;
14302 
14303       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14304           LD->isVolatile())
14305         return false;
14306 
14307       //  We're looking for a sequence like this:
14308       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14309       //      t16: i64 = srl t13, Constant:i32<32>
14310       //    t17: i32 = truncate t16
14311       //  t18: f32 = bitcast t17
14312       //    t19: i32 = truncate t13
14313       //  t20: f32 = bitcast t19
14314 
14315       if (!LD->hasNUsesOfValue(2, 0))
14316         return false;
14317 
14318       auto UI = LD->use_begin();
14319       while (UI.getUse().getResNo() != 0) ++UI;
14320       SDNode *Trunc = *UI++;
14321       while (UI.getUse().getResNo() != 0) ++UI;
14322       SDNode *RightShift = *UI;
14323       if (Trunc->getOpcode() != ISD::TRUNCATE)
14324         std::swap(Trunc, RightShift);
14325 
14326       if (Trunc->getOpcode() != ISD::TRUNCATE ||
14327           Trunc->getValueType(0) != MVT::i32 ||
14328           !Trunc->hasOneUse())
14329         return false;
14330       if (RightShift->getOpcode() != ISD::SRL ||
14331           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14332           RightShift->getConstantOperandVal(1) != 32 ||
14333           !RightShift->hasOneUse())
14334         return false;
14335 
14336       SDNode *Trunc2 = *RightShift->use_begin();
14337       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14338           Trunc2->getValueType(0) != MVT::i32 ||
14339           !Trunc2->hasOneUse())
14340         return false;
14341 
14342       SDNode *Bitcast = *Trunc->use_begin();
14343       SDNode *Bitcast2 = *Trunc2->use_begin();
14344 
14345       if (Bitcast->getOpcode() != ISD::BITCAST ||
14346           Bitcast->getValueType(0) != MVT::f32)
14347         return false;
14348       if (Bitcast2->getOpcode() != ISD::BITCAST ||
14349           Bitcast2->getValueType(0) != MVT::f32)
14350         return false;
14351 
14352       if (Subtarget.isLittleEndian())
14353         std::swap(Bitcast, Bitcast2);
14354 
14355       // Bitcast has the second float (in memory-layout order) and Bitcast2
14356       // has the first one.
14357 
14358       SDValue BasePtr = LD->getBasePtr();
14359       if (LD->isIndexed()) {
14360         assert(LD->getAddressingMode() == ISD::PRE_INC &&
14361                "Non-pre-inc AM on PPC?");
14362         BasePtr =
14363           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14364                       LD->getOffset());
14365       }
14366 
14367       auto MMOFlags =
14368           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14369       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14370                                       LD->getPointerInfo(), LD->getAlignment(),
14371                                       MMOFlags, LD->getAAInfo());
14372       SDValue AddPtr =
14373         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14374                     BasePtr, DAG.getIntPtrConstant(4, dl));
14375       SDValue FloatLoad2 = DAG.getLoad(
14376           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14377           LD->getPointerInfo().getWithOffset(4),
14378           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14379 
14380       if (LD->isIndexed()) {
14381         // Note that DAGCombine should re-form any pre-increment load(s) from
14382         // what is produced here if that makes sense.
14383         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14384       }
14385 
14386       DCI.CombineTo(Bitcast2, FloatLoad);
14387       DCI.CombineTo(Bitcast, FloatLoad2);
14388 
14389       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14390                                     SDValue(FloatLoad2.getNode(), 1));
14391       return true;
14392     };
14393 
14394     if (ReplaceTwoFloatLoad())
14395       return SDValue(N, 0);
14396 
14397     EVT MemVT = LD->getMemoryVT();
14398     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14399     Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14400     if (LD->isUnindexed() && VT.isVector() &&
14401         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14402           // P8 and later hardware should just use LOAD.
14403           !Subtarget.hasP8Vector() &&
14404           (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14405            VT == MVT::v4f32))) &&
14406         LD->getAlign() < ABIAlignment) {
14407       // This is a type-legal unaligned Altivec load.
14408       SDValue Chain = LD->getChain();
14409       SDValue Ptr = LD->getBasePtr();
14410       bool isLittleEndian = Subtarget.isLittleEndian();
14411 
14412       // This implements the loading of unaligned vectors as described in
14413       // the venerable Apple Velocity Engine overview. Specifically:
14414       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14415       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14416       //
14417       // The general idea is to expand a sequence of one or more unaligned
14418       // loads into an alignment-based permutation-control instruction (lvsl
14419       // or lvsr), a series of regular vector loads (which always truncate
14420       // their input address to an aligned address), and a series of
14421       // permutations.  The results of these permutations are the requested
14422       // loaded values.  The trick is that the last "extra" load is not taken
14423       // from the address you might suspect (sizeof(vector) bytes after the
14424       // last requested load), but rather sizeof(vector) - 1 bytes after the
14425       // last requested vector. The point of this is to avoid a page fault if
14426       // the base address happened to be aligned. This works because if the
14427       // base address is aligned, then adding less than a full vector length
14428       // will cause the last vector in the sequence to be (re)loaded.
14429       // Otherwise, the next vector will be fetched as you might suspect was
14430       // necessary.
14431 
14432       // We might be able to reuse the permutation generation from
14433       // a different base address offset from this one by an aligned amount.
14434       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14435       // optimization later.
14436       Intrinsic::ID Intr, IntrLD, IntrPerm;
14437       MVT PermCntlTy, PermTy, LDTy;
14438       Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14439                             : Intrinsic::ppc_altivec_lvsl;
14440       IntrLD = Intrinsic::ppc_altivec_lvx;
14441       IntrPerm = Intrinsic::ppc_altivec_vperm;
14442       PermCntlTy = MVT::v16i8;
14443       PermTy = MVT::v4i32;
14444       LDTy = MVT::v4i32;
14445 
14446       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
14447 
14448       // Create the new MMO for the new base load. It is like the original MMO,
14449       // but represents an area in memory almost twice the vector size centered
14450       // on the original address. If the address is unaligned, we might start
14451       // reading up to (sizeof(vector)-1) bytes below the address of the
14452       // original unaligned load.
14453       MachineFunction &MF = DAG.getMachineFunction();
14454       MachineMemOperand *BaseMMO =
14455         MF.getMachineMemOperand(LD->getMemOperand(),
14456                                 -(long)MemVT.getStoreSize()+1,
14457                                 2*MemVT.getStoreSize()-1);
14458 
14459       // Create the new base load.
14460       SDValue LDXIntID =
14461           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
14462       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14463       SDValue BaseLoad =
14464         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14465                                 DAG.getVTList(PermTy, MVT::Other),
14466                                 BaseLoadOps, LDTy, BaseMMO);
14467 
14468       // Note that the value of IncOffset (which is provided to the next
14469       // load's pointer info offset value, and thus used to calculate the
14470       // alignment), and the value of IncValue (which is actually used to
14471       // increment the pointer value) are different! This is because we
14472       // require the next load to appear to be aligned, even though it
14473       // is actually offset from the base pointer by a lesser amount.
14474       int IncOffset = VT.getSizeInBits() / 8;
14475       int IncValue = IncOffset;
14476 
14477       // Walk (both up and down) the chain looking for another load at the real
14478       // (aligned) offset (the alignment of the other load does not matter in
14479       // this case). If found, then do not use the offset reduction trick, as
14480       // that will prevent the loads from being later combined (as they would
14481       // otherwise be duplicates).
14482       if (!findConsecutiveLoad(LD, DAG))
14483         --IncValue;
14484 
14485       SDValue Increment =
14486           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
14487       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
14488 
14489       MachineMemOperand *ExtraMMO =
14490         MF.getMachineMemOperand(LD->getMemOperand(),
14491                                 1, 2*MemVT.getStoreSize()-1);
14492       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
14493       SDValue ExtraLoad =
14494         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14495                                 DAG.getVTList(PermTy, MVT::Other),
14496                                 ExtraLoadOps, LDTy, ExtraMMO);
14497 
14498       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
14499         BaseLoad.getValue(1), ExtraLoad.getValue(1));
14500 
14501       // Because vperm has a big-endian bias, we must reverse the order
14502       // of the input vectors and complement the permute control vector
14503       // when generating little endian code.  We have already handled the
14504       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
14505       // and ExtraLoad here.
14506       SDValue Perm;
14507       if (isLittleEndian)
14508         Perm = BuildIntrinsicOp(IntrPerm,
14509                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
14510       else
14511         Perm = BuildIntrinsicOp(IntrPerm,
14512                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
14513 
14514       if (VT != PermTy)
14515         Perm = Subtarget.hasAltivec()
14516                    ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
14517                    : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
14518                                  DAG.getTargetConstant(1, dl, MVT::i64));
14519                                // second argument is 1 because this rounding
14520                                // is always exact.
14521 
14522       // The output of the permutation is our loaded result, the TokenFactor is
14523       // our new chain.
14524       DCI.CombineTo(N, Perm, TF);
14525       return SDValue(N, 0);
14526     }
14527     }
14528     break;
14529     case ISD::INTRINSIC_WO_CHAIN: {
14530       bool isLittleEndian = Subtarget.isLittleEndian();
14531       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
14532       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14533                                            : Intrinsic::ppc_altivec_lvsl);
14534       if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
14535         SDValue Add = N->getOperand(1);
14536 
14537         int Bits = 4 /* 16 byte alignment */;
14538 
14539         if (DAG.MaskedValueIsZero(Add->getOperand(1),
14540                                   APInt::getAllOnesValue(Bits /* alignment */)
14541                                       .zext(Add.getScalarValueSizeInBits()))) {
14542           SDNode *BasePtr = Add->getOperand(0).getNode();
14543           for (SDNode::use_iterator UI = BasePtr->use_begin(),
14544                                     UE = BasePtr->use_end();
14545                UI != UE; ++UI) {
14546             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14547                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
14548                     IID) {
14549               // We've found another LVSL/LVSR, and this address is an aligned
14550               // multiple of that one. The results will be the same, so use the
14551               // one we've just found instead.
14552 
14553               return SDValue(*UI, 0);
14554             }
14555           }
14556         }
14557 
14558         if (isa<ConstantSDNode>(Add->getOperand(1))) {
14559           SDNode *BasePtr = Add->getOperand(0).getNode();
14560           for (SDNode::use_iterator UI = BasePtr->use_begin(),
14561                UE = BasePtr->use_end(); UI != UE; ++UI) {
14562             if (UI->getOpcode() == ISD::ADD &&
14563                 isa<ConstantSDNode>(UI->getOperand(1)) &&
14564                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
14565                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
14566                 (1ULL << Bits) == 0) {
14567               SDNode *OtherAdd = *UI;
14568               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
14569                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
14570                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14571                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
14572                   return SDValue(*VI, 0);
14573                 }
14574               }
14575             }
14576           }
14577         }
14578       }
14579 
14580       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
14581       // Expose the vabsduw/h/b opportunity for down stream
14582       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
14583           (IID == Intrinsic::ppc_altivec_vmaxsw ||
14584            IID == Intrinsic::ppc_altivec_vmaxsh ||
14585            IID == Intrinsic::ppc_altivec_vmaxsb)) {
14586         SDValue V1 = N->getOperand(1);
14587         SDValue V2 = N->getOperand(2);
14588         if ((V1.getSimpleValueType() == MVT::v4i32 ||
14589              V1.getSimpleValueType() == MVT::v8i16 ||
14590              V1.getSimpleValueType() == MVT::v16i8) &&
14591             V1.getSimpleValueType() == V2.getSimpleValueType()) {
14592           // (0-a, a)
14593           if (V1.getOpcode() == ISD::SUB &&
14594               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
14595               V1.getOperand(1) == V2) {
14596             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
14597           }
14598           // (a, 0-a)
14599           if (V2.getOpcode() == ISD::SUB &&
14600               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
14601               V2.getOperand(1) == V1) {
14602             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14603           }
14604           // (x-y, y-x)
14605           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
14606               V1.getOperand(0) == V2.getOperand(1) &&
14607               V1.getOperand(1) == V2.getOperand(0)) {
14608             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14609           }
14610         }
14611       }
14612     }
14613 
14614     break;
14615   case ISD::INTRINSIC_W_CHAIN:
14616     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14617     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14618     if (Subtarget.needsSwapsForVSXMemOps()) {
14619       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14620       default:
14621         break;
14622       case Intrinsic::ppc_vsx_lxvw4x:
14623       case Intrinsic::ppc_vsx_lxvd2x:
14624         return expandVSXLoadForLE(N, DCI);
14625       }
14626     }
14627     break;
14628   case ISD::INTRINSIC_VOID:
14629     // For little endian, VSX stores require generating xxswapd/stxvd2x.
14630     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14631     if (Subtarget.needsSwapsForVSXMemOps()) {
14632       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14633       default:
14634         break;
14635       case Intrinsic::ppc_vsx_stxvw4x:
14636       case Intrinsic::ppc_vsx_stxvd2x:
14637         return expandVSXStoreForLE(N, DCI);
14638       }
14639     }
14640     break;
14641   case ISD::BSWAP:
14642     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
14643     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
14644         N->getOperand(0).hasOneUse() &&
14645         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
14646          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
14647           N->getValueType(0) == MVT::i64))) {
14648       SDValue Load = N->getOperand(0);
14649       LoadSDNode *LD = cast<LoadSDNode>(Load);
14650       // Create the byte-swapping load.
14651       SDValue Ops[] = {
14652         LD->getChain(),    // Chain
14653         LD->getBasePtr(),  // Ptr
14654         DAG.getValueType(N->getValueType(0)) // VT
14655       };
14656       SDValue BSLoad =
14657         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
14658                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
14659                                               MVT::i64 : MVT::i32, MVT::Other),
14660                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
14661 
14662       // If this is an i16 load, insert the truncate.
14663       SDValue ResVal = BSLoad;
14664       if (N->getValueType(0) == MVT::i16)
14665         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
14666 
14667       // First, combine the bswap away.  This makes the value produced by the
14668       // load dead.
14669       DCI.CombineTo(N, ResVal);
14670 
14671       // Next, combine the load away, we give it a bogus result value but a real
14672       // chain result.  The result value is dead because the bswap is dead.
14673       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
14674 
14675       // Return N so it doesn't get rechecked!
14676       return SDValue(N, 0);
14677     }
14678     break;
14679   case PPCISD::VCMP:
14680     // If a VCMPo node already exists with exactly the same operands as this
14681     // node, use its result instead of this node (VCMPo computes both a CR6 and
14682     // a normal output).
14683     //
14684     if (!N->getOperand(0).hasOneUse() &&
14685         !N->getOperand(1).hasOneUse() &&
14686         !N->getOperand(2).hasOneUse()) {
14687 
14688       // Scan all of the users of the LHS, looking for VCMPo's that match.
14689       SDNode *VCMPoNode = nullptr;
14690 
14691       SDNode *LHSN = N->getOperand(0).getNode();
14692       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
14693            UI != E; ++UI)
14694         if (UI->getOpcode() == PPCISD::VCMPo &&
14695             UI->getOperand(1) == N->getOperand(1) &&
14696             UI->getOperand(2) == N->getOperand(2) &&
14697             UI->getOperand(0) == N->getOperand(0)) {
14698           VCMPoNode = *UI;
14699           break;
14700         }
14701 
14702       // If there is no VCMPo node, or if the flag value has a single use, don't
14703       // transform this.
14704       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
14705         break;
14706 
14707       // Look at the (necessarily single) use of the flag value.  If it has a
14708       // chain, this transformation is more complex.  Note that multiple things
14709       // could use the value result, which we should ignore.
14710       SDNode *FlagUser = nullptr;
14711       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
14712            FlagUser == nullptr; ++UI) {
14713         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
14714         SDNode *User = *UI;
14715         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
14716           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
14717             FlagUser = User;
14718             break;
14719           }
14720         }
14721       }
14722 
14723       // If the user is a MFOCRF instruction, we know this is safe.
14724       // Otherwise we give up for right now.
14725       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
14726         return SDValue(VCMPoNode, 0);
14727     }
14728     break;
14729   case ISD::BRCOND: {
14730     SDValue Cond = N->getOperand(1);
14731     SDValue Target = N->getOperand(2);
14732 
14733     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14734         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
14735           Intrinsic::loop_decrement) {
14736 
14737       // We now need to make the intrinsic dead (it cannot be instruction
14738       // selected).
14739       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
14740       assert(Cond.getNode()->hasOneUse() &&
14741              "Counter decrement has more than one use");
14742 
14743       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
14744                          N->getOperand(0), Target);
14745     }
14746   }
14747   break;
14748   case ISD::BR_CC: {
14749     // If this is a branch on an altivec predicate comparison, lower this so
14750     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
14751     // lowering is done pre-legalize, because the legalizer lowers the predicate
14752     // compare down to code that is difficult to reassemble.
14753     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
14754     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
14755 
14756     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
14757     // value. If so, pass-through the AND to get to the intrinsic.
14758     if (LHS.getOpcode() == ISD::AND &&
14759         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14760         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
14761           Intrinsic::loop_decrement &&
14762         isa<ConstantSDNode>(LHS.getOperand(1)) &&
14763         !isNullConstant(LHS.getOperand(1)))
14764       LHS = LHS.getOperand(0);
14765 
14766     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14767         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
14768           Intrinsic::loop_decrement &&
14769         isa<ConstantSDNode>(RHS)) {
14770       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
14771              "Counter decrement comparison is not EQ or NE");
14772 
14773       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14774       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
14775                     (CC == ISD::SETNE && !Val);
14776 
14777       // We now need to make the intrinsic dead (it cannot be instruction
14778       // selected).
14779       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
14780       assert(LHS.getNode()->hasOneUse() &&
14781              "Counter decrement has more than one use");
14782 
14783       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
14784                          N->getOperand(0), N->getOperand(4));
14785     }
14786 
14787     int CompareOpc;
14788     bool isDot;
14789 
14790     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14791         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
14792         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
14793       assert(isDot && "Can't compare against a vector result!");
14794 
14795       // If this is a comparison against something other than 0/1, then we know
14796       // that the condition is never/always true.
14797       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14798       if (Val != 0 && Val != 1) {
14799         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
14800           return N->getOperand(0);
14801         // Always !=, turn it into an unconditional branch.
14802         return DAG.getNode(ISD::BR, dl, MVT::Other,
14803                            N->getOperand(0), N->getOperand(4));
14804       }
14805 
14806       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
14807 
14808       // Create the PPCISD altivec 'dot' comparison node.
14809       SDValue Ops[] = {
14810         LHS.getOperand(2),  // LHS of compare
14811         LHS.getOperand(3),  // RHS of compare
14812         DAG.getConstant(CompareOpc, dl, MVT::i32)
14813       };
14814       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
14815       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
14816 
14817       // Unpack the result based on how the target uses it.
14818       PPC::Predicate CompOpc;
14819       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
14820       default:  // Can't happen, don't crash on invalid number though.
14821       case 0:   // Branch on the value of the EQ bit of CR6.
14822         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
14823         break;
14824       case 1:   // Branch on the inverted value of the EQ bit of CR6.
14825         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
14826         break;
14827       case 2:   // Branch on the value of the LT bit of CR6.
14828         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
14829         break;
14830       case 3:   // Branch on the inverted value of the LT bit of CR6.
14831         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
14832         break;
14833       }
14834 
14835       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
14836                          DAG.getConstant(CompOpc, dl, MVT::i32),
14837                          DAG.getRegister(PPC::CR6, MVT::i32),
14838                          N->getOperand(4), CompNode.getValue(1));
14839     }
14840     break;
14841   }
14842   case ISD::BUILD_VECTOR:
14843     return DAGCombineBuildVector(N, DCI);
14844   case ISD::ABS:
14845     return combineABS(N, DCI);
14846   case ISD::VSELECT:
14847     return combineVSelect(N, DCI);
14848   }
14849 
14850   return SDValue();
14851 }
14852 
14853 SDValue
14854 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
14855                                  SelectionDAG &DAG,
14856                                  SmallVectorImpl<SDNode *> &Created) const {
14857   // fold (sdiv X, pow2)
14858   EVT VT = N->getValueType(0);
14859   if (VT == MVT::i64 && !Subtarget.isPPC64())
14860     return SDValue();
14861   if ((VT != MVT::i32 && VT != MVT::i64) ||
14862       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
14863     return SDValue();
14864 
14865   SDLoc DL(N);
14866   SDValue N0 = N->getOperand(0);
14867 
14868   bool IsNegPow2 = (-Divisor).isPowerOf2();
14869   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
14870   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
14871 
14872   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
14873   Created.push_back(Op.getNode());
14874 
14875   if (IsNegPow2) {
14876     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
14877     Created.push_back(Op.getNode());
14878   }
14879 
14880   return Op;
14881 }
14882 
14883 //===----------------------------------------------------------------------===//
14884 // Inline Assembly Support
14885 //===----------------------------------------------------------------------===//
14886 
14887 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
14888                                                       KnownBits &Known,
14889                                                       const APInt &DemandedElts,
14890                                                       const SelectionDAG &DAG,
14891                                                       unsigned Depth) const {
14892   Known.resetAll();
14893   switch (Op.getOpcode()) {
14894   default: break;
14895   case PPCISD::LBRX: {
14896     // lhbrx is known to have the top bits cleared out.
14897     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
14898       Known.Zero = 0xFFFF0000;
14899     break;
14900   }
14901   case ISD::INTRINSIC_WO_CHAIN: {
14902     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
14903     default: break;
14904     case Intrinsic::ppc_altivec_vcmpbfp_p:
14905     case Intrinsic::ppc_altivec_vcmpeqfp_p:
14906     case Intrinsic::ppc_altivec_vcmpequb_p:
14907     case Intrinsic::ppc_altivec_vcmpequh_p:
14908     case Intrinsic::ppc_altivec_vcmpequw_p:
14909     case Intrinsic::ppc_altivec_vcmpequd_p:
14910     case Intrinsic::ppc_altivec_vcmpgefp_p:
14911     case Intrinsic::ppc_altivec_vcmpgtfp_p:
14912     case Intrinsic::ppc_altivec_vcmpgtsb_p:
14913     case Intrinsic::ppc_altivec_vcmpgtsh_p:
14914     case Intrinsic::ppc_altivec_vcmpgtsw_p:
14915     case Intrinsic::ppc_altivec_vcmpgtsd_p:
14916     case Intrinsic::ppc_altivec_vcmpgtub_p:
14917     case Intrinsic::ppc_altivec_vcmpgtuh_p:
14918     case Intrinsic::ppc_altivec_vcmpgtuw_p:
14919     case Intrinsic::ppc_altivec_vcmpgtud_p:
14920       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
14921       break;
14922     }
14923   }
14924   }
14925 }
14926 
14927 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
14928   switch (Subtarget.getCPUDirective()) {
14929   default: break;
14930   case PPC::DIR_970:
14931   case PPC::DIR_PWR4:
14932   case PPC::DIR_PWR5:
14933   case PPC::DIR_PWR5X:
14934   case PPC::DIR_PWR6:
14935   case PPC::DIR_PWR6X:
14936   case PPC::DIR_PWR7:
14937   case PPC::DIR_PWR8:
14938   case PPC::DIR_PWR9:
14939   case PPC::DIR_PWR10:
14940   case PPC::DIR_PWR_FUTURE: {
14941     if (!ML)
14942       break;
14943 
14944     if (!DisableInnermostLoopAlign32) {
14945       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14946       // so that we can decrease cache misses and branch-prediction misses.
14947       // Actual alignment of the loop will depend on the hotness check and other
14948       // logic in alignBlocks.
14949       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
14950         return Align(32);
14951     }
14952 
14953     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
14954 
14955     // For small loops (between 5 and 8 instructions), align to a 32-byte
14956     // boundary so that the entire loop fits in one instruction-cache line.
14957     uint64_t LoopSize = 0;
14958     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
14959       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
14960         LoopSize += TII->getInstSizeInBytes(*J);
14961         if (LoopSize > 32)
14962           break;
14963       }
14964 
14965     if (LoopSize > 16 && LoopSize <= 32)
14966       return Align(32);
14967 
14968     break;
14969   }
14970   }
14971 
14972   return TargetLowering::getPrefLoopAlignment(ML);
14973 }
14974 
14975 /// getConstraintType - Given a constraint, return the type of
14976 /// constraint it is for this target.
14977 PPCTargetLowering::ConstraintType
14978 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
14979   if (Constraint.size() == 1) {
14980     switch (Constraint[0]) {
14981     default: break;
14982     case 'b':
14983     case 'r':
14984     case 'f':
14985     case 'd':
14986     case 'v':
14987     case 'y':
14988       return C_RegisterClass;
14989     case 'Z':
14990       // FIXME: While Z does indicate a memory constraint, it specifically
14991       // indicates an r+r address (used in conjunction with the 'y' modifier
14992       // in the replacement string). Currently, we're forcing the base
14993       // register to be r0 in the asm printer (which is interpreted as zero)
14994       // and forming the complete address in the second register. This is
14995       // suboptimal.
14996       return C_Memory;
14997     }
14998   } else if (Constraint == "wc") { // individual CR bits.
14999     return C_RegisterClass;
15000   } else if (Constraint == "wa" || Constraint == "wd" ||
15001              Constraint == "wf" || Constraint == "ws" ||
15002              Constraint == "wi" || Constraint == "ww") {
15003     return C_RegisterClass; // VSX registers.
15004   }
15005   return TargetLowering::getConstraintType(Constraint);
15006 }
15007 
15008 /// Examine constraint type and operand type and determine a weight value.
15009 /// This object must already have been set up with the operand type
15010 /// and the current alternative constraint selected.
15011 TargetLowering::ConstraintWeight
15012 PPCTargetLowering::getSingleConstraintMatchWeight(
15013     AsmOperandInfo &info, const char *constraint) const {
15014   ConstraintWeight weight = CW_Invalid;
15015   Value *CallOperandVal = info.CallOperandVal;
15016     // If we don't have a value, we can't do a match,
15017     // but allow it at the lowest weight.
15018   if (!CallOperandVal)
15019     return CW_Default;
15020   Type *type = CallOperandVal->getType();
15021 
15022   // Look at the constraint type.
15023   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15024     return CW_Register; // an individual CR bit.
15025   else if ((StringRef(constraint) == "wa" ||
15026             StringRef(constraint) == "wd" ||
15027             StringRef(constraint) == "wf") &&
15028            type->isVectorTy())
15029     return CW_Register;
15030   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15031     return CW_Register; // just hold 64-bit integers data.
15032   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15033     return CW_Register;
15034   else if (StringRef(constraint) == "ww" && type->isFloatTy())
15035     return CW_Register;
15036 
15037   switch (*constraint) {
15038   default:
15039     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15040     break;
15041   case 'b':
15042     if (type->isIntegerTy())
15043       weight = CW_Register;
15044     break;
15045   case 'f':
15046     if (type->isFloatTy())
15047       weight = CW_Register;
15048     break;
15049   case 'd':
15050     if (type->isDoubleTy())
15051       weight = CW_Register;
15052     break;
15053   case 'v':
15054     if (type->isVectorTy())
15055       weight = CW_Register;
15056     break;
15057   case 'y':
15058     weight = CW_Register;
15059     break;
15060   case 'Z':
15061     weight = CW_Memory;
15062     break;
15063   }
15064   return weight;
15065 }
15066 
15067 std::pair<unsigned, const TargetRegisterClass *>
15068 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
15069                                                 StringRef Constraint,
15070                                                 MVT VT) const {
15071   if (Constraint.size() == 1) {
15072     // GCC RS6000 Constraint Letters
15073     switch (Constraint[0]) {
15074     case 'b':   // R1-R31
15075       if (VT == MVT::i64 && Subtarget.isPPC64())
15076         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15077       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15078     case 'r':   // R0-R31
15079       if (VT == MVT::i64 && Subtarget.isPPC64())
15080         return std::make_pair(0U, &PPC::G8RCRegClass);
15081       return std::make_pair(0U, &PPC::GPRCRegClass);
15082     // 'd' and 'f' constraints are both defined to be "the floating point
15083     // registers", where one is for 32-bit and the other for 64-bit. We don't
15084     // really care overly much here so just give them all the same reg classes.
15085     case 'd':
15086     case 'f':
15087       if (Subtarget.hasSPE()) {
15088         if (VT == MVT::f32 || VT == MVT::i32)
15089           return std::make_pair(0U, &PPC::GPRCRegClass);
15090         if (VT == MVT::f64 || VT == MVT::i64)
15091           return std::make_pair(0U, &PPC::SPERCRegClass);
15092       } else {
15093         if (VT == MVT::f32 || VT == MVT::i32)
15094           return std::make_pair(0U, &PPC::F4RCRegClass);
15095         if (VT == MVT::f64 || VT == MVT::i64)
15096           return std::make_pair(0U, &PPC::F8RCRegClass);
15097       }
15098       break;
15099     case 'v':
15100       if (Subtarget.hasAltivec())
15101         return std::make_pair(0U, &PPC::VRRCRegClass);
15102       break;
15103     case 'y':   // crrc
15104       return std::make_pair(0U, &PPC::CRRCRegClass);
15105     }
15106   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15107     // An individual CR bit.
15108     return std::make_pair(0U, &PPC::CRBITRCRegClass);
15109   } else if ((Constraint == "wa" || Constraint == "wd" ||
15110              Constraint == "wf" || Constraint == "wi") &&
15111              Subtarget.hasVSX()) {
15112     return std::make_pair(0U, &PPC::VSRCRegClass);
15113   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15114     if (VT == MVT::f32 && Subtarget.hasP8Vector())
15115       return std::make_pair(0U, &PPC::VSSRCRegClass);
15116     else
15117       return std::make_pair(0U, &PPC::VSFRCRegClass);
15118   }
15119 
15120   // If we name a VSX register, we can't defer to the base class because it
15121   // will not recognize the correct register (their names will be VSL{0-31}
15122   // and V{0-31} so they won't match). So we match them here.
15123   if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15124     int VSNum = atoi(Constraint.data() + 3);
15125     assert(VSNum >= 0 && VSNum <= 63 &&
15126            "Attempted to access a vsr out of range");
15127     if (VSNum < 32)
15128       return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15129     return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15130   }
15131   std::pair<unsigned, const TargetRegisterClass *> R =
15132       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
15133 
15134   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15135   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15136   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15137   // register.
15138   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15139   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15140   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15141       PPC::GPRCRegClass.contains(R.first))
15142     return std::make_pair(TRI->getMatchingSuperReg(R.first,
15143                             PPC::sub_32, &PPC::G8RCRegClass),
15144                           &PPC::G8RCRegClass);
15145 
15146   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15147   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15148     R.first = PPC::CR0;
15149     R.second = &PPC::CRRCRegClass;
15150   }
15151 
15152   return R;
15153 }
15154 
15155 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15156 /// vector.  If it is invalid, don't add anything to Ops.
15157 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15158                                                      std::string &Constraint,
15159                                                      std::vector<SDValue>&Ops,
15160                                                      SelectionDAG &DAG) const {
15161   SDValue Result;
15162 
15163   // Only support length 1 constraints.
15164   if (Constraint.length() > 1) return;
15165 
15166   char Letter = Constraint[0];
15167   switch (Letter) {
15168   default: break;
15169   case 'I':
15170   case 'J':
15171   case 'K':
15172   case 'L':
15173   case 'M':
15174   case 'N':
15175   case 'O':
15176   case 'P': {
15177     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15178     if (!CST) return; // Must be an immediate to match.
15179     SDLoc dl(Op);
15180     int64_t Value = CST->getSExtValue();
15181     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15182                          // numbers are printed as such.
15183     switch (Letter) {
15184     default: llvm_unreachable("Unknown constraint letter!");
15185     case 'I':  // "I" is a signed 16-bit constant.
15186       if (isInt<16>(Value))
15187         Result = DAG.getTargetConstant(Value, dl, TCVT);
15188       break;
15189     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
15190       if (isShiftedUInt<16, 16>(Value))
15191         Result = DAG.getTargetConstant(Value, dl, TCVT);
15192       break;
15193     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
15194       if (isShiftedInt<16, 16>(Value))
15195         Result = DAG.getTargetConstant(Value, dl, TCVT);
15196       break;
15197     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
15198       if (isUInt<16>(Value))
15199         Result = DAG.getTargetConstant(Value, dl, TCVT);
15200       break;
15201     case 'M':  // "M" is a constant that is greater than 31.
15202       if (Value > 31)
15203         Result = DAG.getTargetConstant(Value, dl, TCVT);
15204       break;
15205     case 'N':  // "N" is a positive constant that is an exact power of two.
15206       if (Value > 0 && isPowerOf2_64(Value))
15207         Result = DAG.getTargetConstant(Value, dl, TCVT);
15208       break;
15209     case 'O':  // "O" is the constant zero.
15210       if (Value == 0)
15211         Result = DAG.getTargetConstant(Value, dl, TCVT);
15212       break;
15213     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
15214       if (isInt<16>(-Value))
15215         Result = DAG.getTargetConstant(Value, dl, TCVT);
15216       break;
15217     }
15218     break;
15219   }
15220   }
15221 
15222   if (Result.getNode()) {
15223     Ops.push_back(Result);
15224     return;
15225   }
15226 
15227   // Handle standard constraint letters.
15228   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15229 }
15230 
15231 // isLegalAddressingMode - Return true if the addressing mode represented
15232 // by AM is legal for this target, for a load/store of the specified type.
15233 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
15234                                               const AddrMode &AM, Type *Ty,
15235                                               unsigned AS,
15236                                               Instruction *I) const {
15237   // Vector type r+i form is supported since power9 as DQ form. We don't check
15238   // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15239   // imm form is preferred and the offset can be adjusted to use imm form later
15240   // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15241   // max offset to check legal addressing mode, we should be a little aggressive
15242   // to contain other offsets for that LSRUse.
15243   if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15244     return false;
15245 
15246   // PPC allows a sign-extended 16-bit immediate field.
15247   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15248     return false;
15249 
15250   // No global is ever allowed as a base.
15251   if (AM.BaseGV)
15252     return false;
15253 
15254   // PPC only support r+r,
15255   switch (AM.Scale) {
15256   case 0:  // "r+i" or just "i", depending on HasBaseReg.
15257     break;
15258   case 1:
15259     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
15260       return false;
15261     // Otherwise we have r+r or r+i.
15262     break;
15263   case 2:
15264     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
15265       return false;
15266     // Allow 2*r as r+r.
15267     break;
15268   default:
15269     // No other scales are supported.
15270     return false;
15271   }
15272 
15273   return true;
15274 }
15275 
15276 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15277                                            SelectionDAG &DAG) const {
15278   MachineFunction &MF = DAG.getMachineFunction();
15279   MachineFrameInfo &MFI = MF.getFrameInfo();
15280   MFI.setReturnAddressIsTaken(true);
15281 
15282   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
15283     return SDValue();
15284 
15285   SDLoc dl(Op);
15286   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15287 
15288   // Make sure the function does not optimize away the store of the RA to
15289   // the stack.
15290   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15291   FuncInfo->setLRStoreRequired();
15292   bool isPPC64 = Subtarget.isPPC64();
15293   auto PtrVT = getPointerTy(MF.getDataLayout());
15294 
15295   if (Depth > 0) {
15296     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15297     SDValue Offset =
15298         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15299                         isPPC64 ? MVT::i64 : MVT::i32);
15300     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15301                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15302                        MachinePointerInfo());
15303   }
15304 
15305   // Just load the return address off the stack.
15306   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15307   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15308                      MachinePointerInfo());
15309 }
15310 
15311 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15312                                           SelectionDAG &DAG) const {
15313   SDLoc dl(Op);
15314   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15315 
15316   MachineFunction &MF = DAG.getMachineFunction();
15317   MachineFrameInfo &MFI = MF.getFrameInfo();
15318   MFI.setFrameAddressIsTaken(true);
15319 
15320   EVT PtrVT = getPointerTy(MF.getDataLayout());
15321   bool isPPC64 = PtrVT == MVT::i64;
15322 
15323   // Naked functions never have a frame pointer, and so we use r1. For all
15324   // other functions, this decision must be delayed until during PEI.
15325   unsigned FrameReg;
15326   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15327     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15328   else
15329     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15330 
15331   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15332                                          PtrVT);
15333   while (Depth--)
15334     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15335                             FrameAddr, MachinePointerInfo());
15336   return FrameAddr;
15337 }
15338 
15339 // FIXME? Maybe this could be a TableGen attribute on some registers and
15340 // this table could be generated automatically from RegInfo.
15341 Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
15342                                               const MachineFunction &MF) const {
15343   bool isPPC64 = Subtarget.isPPC64();
15344 
15345   bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15346   if (!is64Bit && VT != LLT::scalar(32))
15347     report_fatal_error("Invalid register global variable type");
15348 
15349   Register Reg = StringSwitch<Register>(RegName)
15350                      .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15351                      .Case("r2", isPPC64 ? Register() : PPC::R2)
15352                      .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15353                      .Default(Register());
15354 
15355   if (Reg)
15356     return Reg;
15357   report_fatal_error("Invalid register name global variable");
15358 }
15359 
15360 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
15361   // 32-bit SVR4 ABI access everything as got-indirect.
15362   if (Subtarget.is32BitELFABI())
15363     return true;
15364 
15365   // AIX accesses everything indirectly through the TOC, which is similar to
15366   // the GOT.
15367   if (Subtarget.isAIXABI())
15368     return true;
15369 
15370   CodeModel::Model CModel = getTargetMachine().getCodeModel();
15371   // If it is small or large code model, module locals are accessed
15372   // indirectly by loading their address from .toc/.got.
15373   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15374     return true;
15375 
15376   // JumpTable and BlockAddress are accessed as got-indirect.
15377   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15378     return true;
15379 
15380   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15381     return Subtarget.isGVIndirectSymbol(G->getGlobal());
15382 
15383   return false;
15384 }
15385 
15386 bool
15387 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
15388   // The PowerPC target isn't yet aware of offsets.
15389   return false;
15390 }
15391 
15392 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
15393                                            const CallInst &I,
15394                                            MachineFunction &MF,
15395                                            unsigned Intrinsic) const {
15396   switch (Intrinsic) {
15397   case Intrinsic::ppc_altivec_lvx:
15398   case Intrinsic::ppc_altivec_lvxl:
15399   case Intrinsic::ppc_altivec_lvebx:
15400   case Intrinsic::ppc_altivec_lvehx:
15401   case Intrinsic::ppc_altivec_lvewx:
15402   case Intrinsic::ppc_vsx_lxvd2x:
15403   case Intrinsic::ppc_vsx_lxvw4x: {
15404     EVT VT;
15405     switch (Intrinsic) {
15406     case Intrinsic::ppc_altivec_lvebx:
15407       VT = MVT::i8;
15408       break;
15409     case Intrinsic::ppc_altivec_lvehx:
15410       VT = MVT::i16;
15411       break;
15412     case Intrinsic::ppc_altivec_lvewx:
15413       VT = MVT::i32;
15414       break;
15415     case Intrinsic::ppc_vsx_lxvd2x:
15416       VT = MVT::v2f64;
15417       break;
15418     default:
15419       VT = MVT::v4i32;
15420       break;
15421     }
15422 
15423     Info.opc = ISD::INTRINSIC_W_CHAIN;
15424     Info.memVT = VT;
15425     Info.ptrVal = I.getArgOperand(0);
15426     Info.offset = -VT.getStoreSize()+1;
15427     Info.size = 2*VT.getStoreSize()-1;
15428     Info.align = Align(1);
15429     Info.flags = MachineMemOperand::MOLoad;
15430     return true;
15431   }
15432   case Intrinsic::ppc_altivec_stvx:
15433   case Intrinsic::ppc_altivec_stvxl:
15434   case Intrinsic::ppc_altivec_stvebx:
15435   case Intrinsic::ppc_altivec_stvehx:
15436   case Intrinsic::ppc_altivec_stvewx:
15437   case Intrinsic::ppc_vsx_stxvd2x:
15438   case Intrinsic::ppc_vsx_stxvw4x: {
15439     EVT VT;
15440     switch (Intrinsic) {
15441     case Intrinsic::ppc_altivec_stvebx:
15442       VT = MVT::i8;
15443       break;
15444     case Intrinsic::ppc_altivec_stvehx:
15445       VT = MVT::i16;
15446       break;
15447     case Intrinsic::ppc_altivec_stvewx:
15448       VT = MVT::i32;
15449       break;
15450     case Intrinsic::ppc_vsx_stxvd2x:
15451       VT = MVT::v2f64;
15452       break;
15453     default:
15454       VT = MVT::v4i32;
15455       break;
15456     }
15457 
15458     Info.opc = ISD::INTRINSIC_VOID;
15459     Info.memVT = VT;
15460     Info.ptrVal = I.getArgOperand(1);
15461     Info.offset = -VT.getStoreSize()+1;
15462     Info.size = 2*VT.getStoreSize()-1;
15463     Info.align = Align(1);
15464     Info.flags = MachineMemOperand::MOStore;
15465     return true;
15466   }
15467   default:
15468     break;
15469   }
15470 
15471   return false;
15472 }
15473 
15474 /// It returns EVT::Other if the type should be determined using generic
15475 /// target-independent logic.
15476 EVT PPCTargetLowering::getOptimalMemOpType(
15477     const MemOp &Op, const AttributeList &FuncAttributes) const {
15478   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
15479     // We should use Altivec/VSX loads and stores when available. For unaligned
15480     // addresses, unaligned VSX loads are only fast starting with the P8.
15481     if (Subtarget.hasAltivec() && Op.size() >= 16 &&
15482         (Op.isAligned(Align(16)) ||
15483          ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
15484       return MVT::v4i32;
15485   }
15486 
15487   if (Subtarget.isPPC64()) {
15488     return MVT::i64;
15489   }
15490 
15491   return MVT::i32;
15492 }
15493 
15494 /// Returns true if it is beneficial to convert a load of a constant
15495 /// to just the constant itself.
15496 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
15497                                                           Type *Ty) const {
15498   assert(Ty->isIntegerTy());
15499 
15500   unsigned BitSize = Ty->getPrimitiveSizeInBits();
15501   return !(BitSize == 0 || BitSize > 64);
15502 }
15503 
15504 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
15505   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15506     return false;
15507   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15508   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15509   return NumBits1 == 64 && NumBits2 == 32;
15510 }
15511 
15512 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
15513   if (!VT1.isInteger() || !VT2.isInteger())
15514     return false;
15515   unsigned NumBits1 = VT1.getSizeInBits();
15516   unsigned NumBits2 = VT2.getSizeInBits();
15517   return NumBits1 == 64 && NumBits2 == 32;
15518 }
15519 
15520 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
15521   // Generally speaking, zexts are not free, but they are free when they can be
15522   // folded with other operations.
15523   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
15524     EVT MemVT = LD->getMemoryVT();
15525     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
15526          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
15527         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
15528          LD->getExtensionType() == ISD::ZEXTLOAD))
15529       return true;
15530   }
15531 
15532   // FIXME: Add other cases...
15533   //  - 32-bit shifts with a zext to i64
15534   //  - zext after ctlz, bswap, etc.
15535   //  - zext after and by a constant mask
15536 
15537   return TargetLowering::isZExtFree(Val, VT2);
15538 }
15539 
15540 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
15541   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
15542          "invalid fpext types");
15543   // Extending to float128 is not free.
15544   if (DestVT == MVT::f128)
15545     return false;
15546   return true;
15547 }
15548 
15549 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
15550   return isInt<16>(Imm) || isUInt<16>(Imm);
15551 }
15552 
15553 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
15554   return isInt<16>(Imm) || isUInt<16>(Imm);
15555 }
15556 
15557 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
15558                                                        unsigned,
15559                                                        unsigned,
15560                                                        MachineMemOperand::Flags,
15561                                                        bool *Fast) const {
15562   if (DisablePPCUnaligned)
15563     return false;
15564 
15565   // PowerPC supports unaligned memory access for simple non-vector types.
15566   // Although accessing unaligned addresses is not as efficient as accessing
15567   // aligned addresses, it is generally more efficient than manual expansion,
15568   // and generally only traps for software emulation when crossing page
15569   // boundaries.
15570 
15571   if (!VT.isSimple())
15572     return false;
15573 
15574   if (VT.isFloatingPoint() && !VT.isVector() &&
15575       !Subtarget.allowsUnalignedFPAccess())
15576     return false;
15577 
15578   if (VT.getSimpleVT().isVector()) {
15579     if (Subtarget.hasVSX()) {
15580       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
15581           VT != MVT::v4f32 && VT != MVT::v4i32)
15582         return false;
15583     } else {
15584       return false;
15585     }
15586   }
15587 
15588   if (VT == MVT::ppcf128)
15589     return false;
15590 
15591   if (Fast)
15592     *Fast = true;
15593 
15594   return true;
15595 }
15596 
15597 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
15598                                                    EVT VT) const {
15599   return isFMAFasterThanFMulAndFAdd(
15600       MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
15601 }
15602 
15603 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
15604                                                    Type *Ty) const {
15605   switch (Ty->getScalarType()->getTypeID()) {
15606   case Type::FloatTyID:
15607   case Type::DoubleTyID:
15608     return true;
15609   case Type::FP128TyID:
15610     return Subtarget.hasP9Vector();
15611   default:
15612     return false;
15613   }
15614 }
15615 
15616 // FIXME: add more patterns which are not profitable to hoist.
15617 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
15618   if (!I->hasOneUse())
15619     return true;
15620 
15621   Instruction *User = I->user_back();
15622   assert(User && "A single use instruction with no uses.");
15623 
15624   switch (I->getOpcode()) {
15625   case Instruction::FMul: {
15626     // Don't break FMA, PowerPC prefers FMA.
15627     if (User->getOpcode() != Instruction::FSub &&
15628         User->getOpcode() != Instruction::FAdd)
15629       return true;
15630 
15631     const TargetOptions &Options = getTargetMachine().Options;
15632     const Function *F = I->getFunction();
15633     const DataLayout &DL = F->getParent()->getDataLayout();
15634     Type *Ty = User->getOperand(0)->getType();
15635 
15636     return !(
15637         isFMAFasterThanFMulAndFAdd(*F, Ty) &&
15638         isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
15639         (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
15640   }
15641   case Instruction::Load: {
15642     // Don't break "store (load float*)" pattern, this pattern will be combined
15643     // to "store (load int32)" in later InstCombine pass. See function
15644     // combineLoadToOperationType. On PowerPC, loading a float point takes more
15645     // cycles than loading a 32 bit integer.
15646     LoadInst *LI = cast<LoadInst>(I);
15647     // For the loads that combineLoadToOperationType does nothing, like
15648     // ordered load, it should be profitable to hoist them.
15649     // For swifterror load, it can only be used for pointer to pointer type, so
15650     // later type check should get rid of this case.
15651     if (!LI->isUnordered())
15652       return true;
15653 
15654     if (User->getOpcode() != Instruction::Store)
15655       return true;
15656 
15657     if (I->getType()->getTypeID() != Type::FloatTyID)
15658       return true;
15659 
15660     return false;
15661   }
15662   default:
15663     return true;
15664   }
15665   return true;
15666 }
15667 
15668 const MCPhysReg *
15669 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
15670   // LR is a callee-save register, but we must treat it as clobbered by any call
15671   // site. Hence we include LR in the scratch registers, which are in turn added
15672   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
15673   // to CTR, which is used by any indirect call.
15674   static const MCPhysReg ScratchRegs[] = {
15675     PPC::X12, PPC::LR8, PPC::CTR8, 0
15676   };
15677 
15678   return ScratchRegs;
15679 }
15680 
15681 Register PPCTargetLowering::getExceptionPointerRegister(
15682     const Constant *PersonalityFn) const {
15683   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
15684 }
15685 
15686 Register PPCTargetLowering::getExceptionSelectorRegister(
15687     const Constant *PersonalityFn) const {
15688   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
15689 }
15690 
15691 bool
15692 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
15693                      EVT VT , unsigned DefinedValues) const {
15694   if (VT == MVT::v2i64)
15695     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
15696 
15697   if (Subtarget.hasVSX())
15698     return true;
15699 
15700   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
15701 }
15702 
15703 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
15704   if (DisableILPPref || Subtarget.enableMachineScheduler())
15705     return TargetLowering::getSchedulingPreference(N);
15706 
15707   return Sched::ILP;
15708 }
15709 
15710 // Create a fast isel object.
15711 FastISel *
15712 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
15713                                   const TargetLibraryInfo *LibInfo) const {
15714   return PPC::createFastISel(FuncInfo, LibInfo);
15715 }
15716 
15717 // 'Inverted' means the FMA opcode after negating one multiplicand.
15718 // For example, (fma -a b c) = (fnmsub a b c)
15719 static unsigned invertFMAOpcode(unsigned Opc) {
15720   switch (Opc) {
15721   default:
15722     llvm_unreachable("Invalid FMA opcode for PowerPC!");
15723   case ISD::FMA:
15724     return PPCISD::FNMSUB;
15725   case PPCISD::FNMSUB:
15726     return ISD::FMA;
15727   }
15728 }
15729 
15730 SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
15731                                                 bool LegalOps, bool OptForSize,
15732                                                 NegatibleCost &Cost,
15733                                                 unsigned Depth) const {
15734   if (Depth > SelectionDAG::MaxRecursionDepth)
15735     return SDValue();
15736 
15737   unsigned Opc = Op.getOpcode();
15738   EVT VT = Op.getValueType();
15739   SDNodeFlags Flags = Op.getNode()->getFlags();
15740 
15741   switch (Opc) {
15742   case PPCISD::FNMSUB:
15743     if (!Op.hasOneUse() || !isTypeLegal(VT))
15744       break;
15745 
15746     const TargetOptions &Options = getTargetMachine().Options;
15747     SDValue N0 = Op.getOperand(0);
15748     SDValue N1 = Op.getOperand(1);
15749     SDValue N2 = Op.getOperand(2);
15750     SDLoc Loc(Op);
15751 
15752     NegatibleCost N2Cost = NegatibleCost::Expensive;
15753     SDValue NegN2 =
15754         getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
15755 
15756     if (!NegN2)
15757       return SDValue();
15758 
15759     // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
15760     // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
15761     // These transformations may change sign of zeroes. For example,
15762     // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
15763     if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
15764       // Try and choose the cheaper one to negate.
15765       NegatibleCost N0Cost = NegatibleCost::Expensive;
15766       SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
15767                                            N0Cost, Depth + 1);
15768 
15769       NegatibleCost N1Cost = NegatibleCost::Expensive;
15770       SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
15771                                            N1Cost, Depth + 1);
15772 
15773       if (NegN0 && N0Cost <= N1Cost) {
15774         Cost = std::min(N0Cost, N2Cost);
15775         return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
15776       } else if (NegN1) {
15777         Cost = std::min(N1Cost, N2Cost);
15778         return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
15779       }
15780     }
15781 
15782     // (fneg (fnmsub a b c)) => (fma a b (fneg c))
15783     if (isOperationLegal(ISD::FMA, VT)) {
15784       Cost = N2Cost;
15785       return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
15786     }
15787 
15788     break;
15789   }
15790 
15791   return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
15792                                               Cost, Depth);
15793 }
15794 
15795 // Override to enable LOAD_STACK_GUARD lowering on Linux.
15796 bool PPCTargetLowering::useLoadStackGuardNode() const {
15797   if (!Subtarget.isTargetLinux())
15798     return TargetLowering::useLoadStackGuardNode();
15799   return true;
15800 }
15801 
15802 // Override to disable global variable loading on Linux.
15803 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
15804   if (!Subtarget.isTargetLinux())
15805     return TargetLowering::insertSSPDeclarations(M);
15806 }
15807 
15808 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
15809                                      bool ForCodeSize) const {
15810   if (!VT.isSimple() || !Subtarget.hasVSX())
15811     return false;
15812 
15813   switch(VT.getSimpleVT().SimpleTy) {
15814   default:
15815     // For FP types that are currently not supported by PPC backend, return
15816     // false. Examples: f16, f80.
15817     return false;
15818   case MVT::f32:
15819   case MVT::f64:
15820     if (Subtarget.hasPrefixInstrs()) {
15821       // With prefixed instructions, we can materialize anything that can be
15822       // represented with a 32-bit immediate, not just positive zero.
15823       APFloat APFloatOfImm = Imm;
15824       return convertToNonDenormSingle(APFloatOfImm);
15825     }
15826     LLVM_FALLTHROUGH;
15827   case MVT::ppcf128:
15828     return Imm.isPosZero();
15829   }
15830 }
15831 
15832 // For vector shift operation op, fold
15833 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
15834 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
15835                                   SelectionDAG &DAG) {
15836   SDValue N0 = N->getOperand(0);
15837   SDValue N1 = N->getOperand(1);
15838   EVT VT = N0.getValueType();
15839   unsigned OpSizeInBits = VT.getScalarSizeInBits();
15840   unsigned Opcode = N->getOpcode();
15841   unsigned TargetOpcode;
15842 
15843   switch (Opcode) {
15844   default:
15845     llvm_unreachable("Unexpected shift operation");
15846   case ISD::SHL:
15847     TargetOpcode = PPCISD::SHL;
15848     break;
15849   case ISD::SRL:
15850     TargetOpcode = PPCISD::SRL;
15851     break;
15852   case ISD::SRA:
15853     TargetOpcode = PPCISD::SRA;
15854     break;
15855   }
15856 
15857   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
15858       N1->getOpcode() == ISD::AND)
15859     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
15860       if (Mask->getZExtValue() == OpSizeInBits - 1)
15861         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
15862 
15863   return SDValue();
15864 }
15865 
15866 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
15867   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15868     return Value;
15869 
15870   SDValue N0 = N->getOperand(0);
15871   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15872   if (!Subtarget.isISA3_0() ||
15873       N0.getOpcode() != ISD::SIGN_EXTEND ||
15874       N0.getOperand(0).getValueType() != MVT::i32 ||
15875       CN1 == nullptr || N->getValueType(0) != MVT::i64)
15876     return SDValue();
15877 
15878   // We can't save an operation here if the value is already extended, and
15879   // the existing shift is easier to combine.
15880   SDValue ExtsSrc = N0.getOperand(0);
15881   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
15882       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
15883     return SDValue();
15884 
15885   SDLoc DL(N0);
15886   SDValue ShiftBy = SDValue(CN1, 0);
15887   // We want the shift amount to be i32 on the extswli, but the shift could
15888   // have an i64.
15889   if (ShiftBy.getValueType() == MVT::i64)
15890     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
15891 
15892   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
15893                          ShiftBy);
15894 }
15895 
15896 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
15897   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15898     return Value;
15899 
15900   return SDValue();
15901 }
15902 
15903 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
15904   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15905     return Value;
15906 
15907   return SDValue();
15908 }
15909 
15910 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
15911 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
15912 // When C is zero, the equation (addi Z, -C) can be simplified to Z
15913 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
15914 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
15915                                  const PPCSubtarget &Subtarget) {
15916   if (!Subtarget.isPPC64())
15917     return SDValue();
15918 
15919   SDValue LHS = N->getOperand(0);
15920   SDValue RHS = N->getOperand(1);
15921 
15922   auto isZextOfCompareWithConstant = [](SDValue Op) {
15923     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
15924         Op.getValueType() != MVT::i64)
15925       return false;
15926 
15927     SDValue Cmp = Op.getOperand(0);
15928     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
15929         Cmp.getOperand(0).getValueType() != MVT::i64)
15930       return false;
15931 
15932     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
15933       int64_t NegConstant = 0 - Constant->getSExtValue();
15934       // Due to the limitations of the addi instruction,
15935       // -C is required to be [-32768, 32767].
15936       return isInt<16>(NegConstant);
15937     }
15938 
15939     return false;
15940   };
15941 
15942   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
15943   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
15944 
15945   // If there is a pattern, canonicalize a zext operand to the RHS.
15946   if (LHSHasPattern && !RHSHasPattern)
15947     std::swap(LHS, RHS);
15948   else if (!LHSHasPattern && !RHSHasPattern)
15949     return SDValue();
15950 
15951   SDLoc DL(N);
15952   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
15953   SDValue Cmp = RHS.getOperand(0);
15954   SDValue Z = Cmp.getOperand(0);
15955   auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
15956 
15957   assert(Constant && "Constant Should not be a null pointer.");
15958   int64_t NegConstant = 0 - Constant->getSExtValue();
15959 
15960   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
15961   default: break;
15962   case ISD::SETNE: {
15963     //                                 when C == 0
15964     //                             --> addze X, (addic Z, -1).carry
15965     //                            /
15966     // add X, (zext(setne Z, C))--
15967     //                            \    when -32768 <= -C <= 32767 && C != 0
15968     //                             --> addze X, (addic (addi Z, -C), -1).carry
15969     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
15970                               DAG.getConstant(NegConstant, DL, MVT::i64));
15971     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
15972     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
15973                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
15974     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
15975                        SDValue(Addc.getNode(), 1));
15976     }
15977   case ISD::SETEQ: {
15978     //                                 when C == 0
15979     //                             --> addze X, (subfic Z, 0).carry
15980     //                            /
15981     // add X, (zext(sete  Z, C))--
15982     //                            \    when -32768 <= -C <= 32767 && C != 0
15983     //                             --> addze X, (subfic (addi Z, -C), 0).carry
15984     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
15985                               DAG.getConstant(NegConstant, DL, MVT::i64));
15986     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
15987     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
15988                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
15989     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
15990                        SDValue(Subc.getNode(), 1));
15991     }
15992   }
15993 
15994   return SDValue();
15995 }
15996 
15997 // Transform
15998 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
15999 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16000 // In this case both C1 and C2 must be known constants.
16001 // C1+C2 must fit into a 34 bit signed integer.
16002 static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
16003                                           const PPCSubtarget &Subtarget) {
16004   if (!Subtarget.isUsingPCRelativeCalls())
16005     return SDValue();
16006 
16007   // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16008   // If we find that node try to cast the Global Address and the Constant.
16009   SDValue LHS = N->getOperand(0);
16010   SDValue RHS = N->getOperand(1);
16011 
16012   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16013     std::swap(LHS, RHS);
16014 
16015   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16016     return SDValue();
16017 
16018   // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16019   GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16020   ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16021 
16022   // Check that both casts succeeded.
16023   if (!GSDN || !ConstNode)
16024     return SDValue();
16025 
16026   int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16027   SDLoc DL(GSDN);
16028 
16029   // The signed int offset needs to fit in 34 bits.
16030   if (!isInt<34>(NewOffset))
16031     return SDValue();
16032 
16033   // The new global address is a copy of the old global address except
16034   // that it has the updated Offset.
16035   SDValue GA =
16036       DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16037                                  NewOffset, GSDN->getTargetFlags());
16038   SDValue MatPCRel =
16039       DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16040   return MatPCRel;
16041 }
16042 
16043 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16044   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16045     return Value;
16046 
16047   if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16048     return Value;
16049 
16050   return SDValue();
16051 }
16052 
16053 // Detect TRUNCATE operations on bitcasts of float128 values.
16054 // What we are looking for here is the situtation where we extract a subset
16055 // of bits from a 128 bit float.
16056 // This can be of two forms:
16057 // 1) BITCAST of f128 feeding TRUNCATE
16058 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16059 // The reason this is required is because we do not have a legal i128 type
16060 // and so we want to prevent having to store the f128 and then reload part
16061 // of it.
16062 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16063                                            DAGCombinerInfo &DCI) const {
16064   // If we are using CRBits then try that first.
16065   if (Subtarget.useCRBits()) {
16066     // Check if CRBits did anything and return that if it did.
16067     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16068       return CRTruncValue;
16069   }
16070 
16071   SDLoc dl(N);
16072   SDValue Op0 = N->getOperand(0);
16073 
16074   // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16075   if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16076     EVT VT = N->getValueType(0);
16077     if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16078       return SDValue();
16079     SDValue Sub = Op0.getOperand(0);
16080     if (Sub.getOpcode() == ISD::SUB) {
16081       SDValue SubOp0 = Sub.getOperand(0);
16082       SDValue SubOp1 = Sub.getOperand(1);
16083       if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16084           (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16085         return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16086                                SubOp1.getOperand(0),
16087                                DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16088       }
16089     }
16090   }
16091 
16092   // Looking for a truncate of i128 to i64.
16093   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16094     return SDValue();
16095 
16096   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16097 
16098   // SRL feeding TRUNCATE.
16099   if (Op0.getOpcode() == ISD::SRL) {
16100     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16101     // The right shift has to be by 64 bits.
16102     if (!ConstNode || ConstNode->getZExtValue() != 64)
16103       return SDValue();
16104 
16105     // Switch the element number to extract.
16106     EltToExtract = EltToExtract ? 0 : 1;
16107     // Update Op0 past the SRL.
16108     Op0 = Op0.getOperand(0);
16109   }
16110 
16111   // BITCAST feeding a TRUNCATE possibly via SRL.
16112   if (Op0.getOpcode() == ISD::BITCAST &&
16113       Op0.getValueType() == MVT::i128 &&
16114       Op0.getOperand(0).getValueType() == MVT::f128) {
16115     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16116     return DCI.DAG.getNode(
16117         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16118         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16119   }
16120   return SDValue();
16121 }
16122 
16123 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16124   SelectionDAG &DAG = DCI.DAG;
16125 
16126   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16127   if (!ConstOpOrElement)
16128     return SDValue();
16129 
16130   // An imul is usually smaller than the alternative sequence for legal type.
16131   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16132       isOperationLegal(ISD::MUL, N->getValueType(0)))
16133     return SDValue();
16134 
16135   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16136     switch (this->Subtarget.getCPUDirective()) {
16137     default:
16138       // TODO: enhance the condition for subtarget before pwr8
16139       return false;
16140     case PPC::DIR_PWR8:
16141       //  type        mul     add    shl
16142       // scalar        4       1      1
16143       // vector        7       2      2
16144       return true;
16145     case PPC::DIR_PWR9:
16146     case PPC::DIR_PWR10:
16147     case PPC::DIR_PWR_FUTURE:
16148       //  type        mul     add    shl
16149       // scalar        5       2      2
16150       // vector        7       2      2
16151 
16152       // The cycle RATIO of related operations are showed as a table above.
16153       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16154       // scalar and vector type. For 2 instrs patterns, add/sub + shl
16155       // are 4, it is always profitable; but for 3 instrs patterns
16156       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16157       // So we should only do it for vector type.
16158       return IsAddOne && IsNeg ? VT.isVector() : true;
16159     }
16160   };
16161 
16162   EVT VT = N->getValueType(0);
16163   SDLoc DL(N);
16164 
16165   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16166   bool IsNeg = MulAmt.isNegative();
16167   APInt MulAmtAbs = MulAmt.abs();
16168 
16169   if ((MulAmtAbs - 1).isPowerOf2()) {
16170     // (mul x, 2^N + 1) => (add (shl x, N), x)
16171     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16172 
16173     if (!IsProfitable(IsNeg, true, VT))
16174       return SDValue();
16175 
16176     SDValue Op0 = N->getOperand(0);
16177     SDValue Op1 =
16178         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16179                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16180     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16181 
16182     if (!IsNeg)
16183       return Res;
16184 
16185     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16186   } else if ((MulAmtAbs + 1).isPowerOf2()) {
16187     // (mul x, 2^N - 1) => (sub (shl x, N), x)
16188     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16189 
16190     if (!IsProfitable(IsNeg, false, VT))
16191       return SDValue();
16192 
16193     SDValue Op0 = N->getOperand(0);
16194     SDValue Op1 =
16195         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16196                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16197 
16198     if (!IsNeg)
16199       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16200     else
16201       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16202 
16203   } else {
16204     return SDValue();
16205   }
16206 }
16207 
16208 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16209 // in combiner since we need to check SD flags and other subtarget features.
16210 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16211                                           DAGCombinerInfo &DCI) const {
16212   SDValue N0 = N->getOperand(0);
16213   SDValue N1 = N->getOperand(1);
16214   SDValue N2 = N->getOperand(2);
16215   SDNodeFlags Flags = N->getFlags();
16216   EVT VT = N->getValueType(0);
16217   SelectionDAG &DAG = DCI.DAG;
16218   const TargetOptions &Options = getTargetMachine().Options;
16219   unsigned Opc = N->getOpcode();
16220   bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16221   bool LegalOps = !DCI.isBeforeLegalizeOps();
16222   SDLoc Loc(N);
16223 
16224   if (!isOperationLegal(ISD::FMA, VT))
16225     return SDValue();
16226 
16227   // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16228   // since (fnmsub a b c)=-0 while c-ab=+0.
16229   if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16230     return SDValue();
16231 
16232   // (fma (fneg a) b c) => (fnmsub a b c)
16233   // (fnmsub (fneg a) b c) => (fma a b c)
16234   if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16235     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16236 
16237   // (fma a (fneg b) c) => (fnmsub a b c)
16238   // (fnmsub a (fneg b) c) => (fma a b c)
16239   if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16240     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16241 
16242   return SDValue();
16243 }
16244 
16245 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16246   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16247   if (!Subtarget.is64BitELFABI())
16248     return false;
16249 
16250   // If not a tail call then no need to proceed.
16251   if (!CI->isTailCall())
16252     return false;
16253 
16254   // If sibling calls have been disabled and tail-calls aren't guaranteed
16255   // there is no reason to duplicate.
16256   auto &TM = getTargetMachine();
16257   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16258     return false;
16259 
16260   // Can't tail call a function called indirectly, or if it has variadic args.
16261   const Function *Callee = CI->getCalledFunction();
16262   if (!Callee || Callee->isVarArg())
16263     return false;
16264 
16265   // Make sure the callee and caller calling conventions are eligible for tco.
16266   const Function *Caller = CI->getParent()->getParent();
16267   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16268                                            CI->getCallingConv()))
16269       return false;
16270 
16271   // If the function is local then we have a good chance at tail-calling it
16272   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16273 }
16274 
16275 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16276   if (!Subtarget.hasVSX())
16277     return false;
16278   if (Subtarget.hasP9Vector() && VT == MVT::f128)
16279     return true;
16280   return VT == MVT::f32 || VT == MVT::f64 ||
16281     VT == MVT::v4f32 || VT == MVT::v2f64;
16282 }
16283 
16284 bool PPCTargetLowering::
16285 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16286   const Value *Mask = AndI.getOperand(1);
16287   // If the mask is suitable for andi. or andis. we should sink the and.
16288   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16289     // Can't handle constants wider than 64-bits.
16290     if (CI->getBitWidth() > 64)
16291       return false;
16292     int64_t ConstVal = CI->getZExtValue();
16293     return isUInt<16>(ConstVal) ||
16294       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16295   }
16296 
16297   // For non-constant masks, we can always use the record-form and.
16298   return true;
16299 }
16300 
16301 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16302 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16303 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16304 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16305 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16306 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16307   assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
16308   assert(Subtarget.hasP9Altivec() &&
16309          "Only combine this when P9 altivec supported!");
16310   EVT VT = N->getValueType(0);
16311   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16312     return SDValue();
16313 
16314   SelectionDAG &DAG = DCI.DAG;
16315   SDLoc dl(N);
16316   if (N->getOperand(0).getOpcode() == ISD::SUB) {
16317     // Even for signed integers, if it's known to be positive (as signed
16318     // integer) due to zero-extended inputs.
16319     unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16320     unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16321     if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16322          SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16323         (SubOpcd1 == ISD::ZERO_EXTEND ||
16324          SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16325       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16326                          N->getOperand(0)->getOperand(0),
16327                          N->getOperand(0)->getOperand(1),
16328                          DAG.getTargetConstant(0, dl, MVT::i32));
16329     }
16330 
16331     // For type v4i32, it can be optimized with xvnegsp + vabsduw
16332     if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16333         N->getOperand(0).hasOneUse()) {
16334       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16335                          N->getOperand(0)->getOperand(0),
16336                          N->getOperand(0)->getOperand(1),
16337                          DAG.getTargetConstant(1, dl, MVT::i32));
16338     }
16339   }
16340 
16341   return SDValue();
16342 }
16343 
16344 // For type v4i32/v8ii16/v16i8, transform
16345 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16346 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16347 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16348 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16349 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16350                                           DAGCombinerInfo &DCI) const {
16351   assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
16352   assert(Subtarget.hasP9Altivec() &&
16353          "Only combine this when P9 altivec supported!");
16354 
16355   SelectionDAG &DAG = DCI.DAG;
16356   SDLoc dl(N);
16357   SDValue Cond = N->getOperand(0);
16358   SDValue TrueOpnd = N->getOperand(1);
16359   SDValue FalseOpnd = N->getOperand(2);
16360   EVT VT = N->getOperand(1).getValueType();
16361 
16362   if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16363       FalseOpnd.getOpcode() != ISD::SUB)
16364     return SDValue();
16365 
16366   // ABSD only available for type v4i32/v8i16/v16i8
16367   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16368     return SDValue();
16369 
16370   // At least to save one more dependent computation
16371   if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16372     return SDValue();
16373 
16374   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16375 
16376   // Can only handle unsigned comparison here
16377   switch (CC) {
16378   default:
16379     return SDValue();
16380   case ISD::SETUGT:
16381   case ISD::SETUGE:
16382     break;
16383   case ISD::SETULT:
16384   case ISD::SETULE:
16385     std::swap(TrueOpnd, FalseOpnd);
16386     break;
16387   }
16388 
16389   SDValue CmpOpnd1 = Cond.getOperand(0);
16390   SDValue CmpOpnd2 = Cond.getOperand(1);
16391 
16392   // SETCC CmpOpnd1 CmpOpnd2 cond
16393   // TrueOpnd = CmpOpnd1 - CmpOpnd2
16394   // FalseOpnd = CmpOpnd2 - CmpOpnd1
16395   if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16396       TrueOpnd.getOperand(1) == CmpOpnd2 &&
16397       FalseOpnd.getOperand(0) == CmpOpnd2 &&
16398       FalseOpnd.getOperand(1) == CmpOpnd1) {
16399     return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16400                        CmpOpnd1, CmpOpnd2,
16401                        DAG.getTargetConstant(0, dl, MVT::i32));
16402   }
16403 
16404   return SDValue();
16405 }
16406