1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56 #include "llvm/CodeGen/TargetRegisterInfo.h"
57 #include "llvm/CodeGen/ValueTypes.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSectionXCOFF.h"
78 #include "llvm/MC/MCSymbolXCOFF.h"
79 #include "llvm/Support/AtomicOrdering.h"
80 #include "llvm/Support/BranchProbability.h"
81 #include "llvm/Support/Casting.h"
82 #include "llvm/Support/CodeGen.h"
83 #include "llvm/Support/CommandLine.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
86 #include "llvm/Support/ErrorHandling.h"
87 #include "llvm/Support/Format.h"
88 #include "llvm/Support/KnownBits.h"
89 #include "llvm/Support/MachineValueType.h"
90 #include "llvm/Support/MathExtras.h"
91 #include "llvm/Support/raw_ostream.h"
92 #include "llvm/Target/TargetMachine.h"
93 #include "llvm/Target/TargetOptions.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
124 // TODO - Remove this option if soft fp128 has been fully supported .
125 static cl::opt<bool>
126     EnableSoftFP128("enable-soft-fp128",
127                     cl::desc("temp option to enable soft fp128"), cl::Hidden);
128 
129 STATISTIC(NumTailCalls, "Number of tail calls");
130 STATISTIC(NumSiblingCalls, "Number of sibling calls");
131 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
132 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
133 
134 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135 
136 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137 
138 // FIXME: Remove this once the bug has been fixed!
139 extern cl::opt<bool> ANDIGlueBug;
140 
141 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
142                                      const PPCSubtarget &STI)
143     : TargetLowering(TM), Subtarget(STI) {
144   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145   // arguments are at least 4/8 bytes aligned.
146   bool isPPC64 = Subtarget.isPPC64();
147   setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148 
149   // Set up the register classes.
150   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151   if (!useSoftFloat()) {
152     if (hasSPE()) {
153       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154       // EFPU2 APU only supports f32
155       if (!Subtarget.hasEFPU2())
156         addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
157     } else {
158       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
159       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
160     }
161   }
162 
163   // Match BITREVERSE to customized fast code sequence in the td file.
164   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
165   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
166 
167   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
168   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
169 
170   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
171   for (MVT VT : MVT::integer_valuetypes()) {
172     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
173     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
174   }
175 
176   if (Subtarget.isISA3_0()) {
177     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
178     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
179     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
180     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
181   } else {
182     // No extending loads from f16 or HW conversions back and forth.
183     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
184     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
185     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
186     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
187     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
188     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
189     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
190     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
191   }
192 
193   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
194 
195   // PowerPC has pre-inc load and store's.
196   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
197   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
198   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
199   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
200   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
201   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
202   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
203   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
204   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
205   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
206   if (!Subtarget.hasSPE()) {
207     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
208     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
209     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
210     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
211   }
212 
213   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
214   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
215   for (MVT VT : ScalarIntVTs) {
216     setOperationAction(ISD::ADDC, VT, Legal);
217     setOperationAction(ISD::ADDE, VT, Legal);
218     setOperationAction(ISD::SUBC, VT, Legal);
219     setOperationAction(ISD::SUBE, VT, Legal);
220   }
221 
222   if (Subtarget.useCRBits()) {
223     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
224 
225     if (isPPC64 || Subtarget.hasFPCVT()) {
226       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
227       AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
228                         isPPC64 ? MVT::i64 : MVT::i32);
229       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
230       AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
231                         isPPC64 ? MVT::i64 : MVT::i32);
232 
233       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
234       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
235                          isPPC64 ? MVT::i64 : MVT::i32);
236       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
237       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
238                         isPPC64 ? MVT::i64 : MVT::i32);
239 
240       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
241       AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
242                         isPPC64 ? MVT::i64 : MVT::i32);
243       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
244       AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
245                         isPPC64 ? MVT::i64 : MVT::i32);
246 
247       setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
248       AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
249                         isPPC64 ? MVT::i64 : MVT::i32);
250       setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
251       AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
252                         isPPC64 ? MVT::i64 : MVT::i32);
253     } else {
254       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
255       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
256       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
257       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
258     }
259 
260     // PowerPC does not support direct load/store of condition registers.
261     setOperationAction(ISD::LOAD, MVT::i1, Custom);
262     setOperationAction(ISD::STORE, MVT::i1, Custom);
263 
264     // FIXME: Remove this once the ANDI glue bug is fixed:
265     if (ANDIGlueBug)
266       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
267 
268     for (MVT VT : MVT::integer_valuetypes()) {
269       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
270       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
271       setTruncStoreAction(VT, MVT::i1, Expand);
272     }
273 
274     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
275   }
276 
277   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
278   // PPC (the libcall is not available).
279   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
280   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
281   setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
282   setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
283 
284   // We do not currently implement these libm ops for PowerPC.
285   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
286   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
287   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
288   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
289   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
290   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
291 
292   // PowerPC has no SREM/UREM instructions unless we are on P9
293   // On P9 we may use a hardware instruction to compute the remainder.
294   // When the result of both the remainder and the division is required it is
295   // more efficient to compute the remainder from the result of the division
296   // rather than use the remainder instruction. The instructions are legalized
297   // directly because the DivRemPairsPass performs the transformation at the IR
298   // level.
299   if (Subtarget.isISA3_0()) {
300     setOperationAction(ISD::SREM, MVT::i32, Legal);
301     setOperationAction(ISD::UREM, MVT::i32, Legal);
302     setOperationAction(ISD::SREM, MVT::i64, Legal);
303     setOperationAction(ISD::UREM, MVT::i64, Legal);
304   } else {
305     setOperationAction(ISD::SREM, MVT::i32, Expand);
306     setOperationAction(ISD::UREM, MVT::i32, Expand);
307     setOperationAction(ISD::SREM, MVT::i64, Expand);
308     setOperationAction(ISD::UREM, MVT::i64, Expand);
309   }
310 
311   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
312   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
313   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
314   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
315   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
316   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
317   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
318   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
319   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
320 
321   // Handle constrained floating-point operations of scalar.
322   // TODO: Handle SPE specific operation.
323   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
324   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
325   setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
326   setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
327   setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
328   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
329 
330   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
331   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
332   setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
333   setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
334   setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
335   if (Subtarget.hasVSX()) {
336     setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
337     setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
338   }
339 
340   if (Subtarget.hasFSQRT()) {
341     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
342     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
343   }
344 
345   if (Subtarget.hasFPRND()) {
346     setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
347     setOperationAction(ISD::STRICT_FCEIL,  MVT::f32, Legal);
348     setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
349     setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
350 
351     setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
352     setOperationAction(ISD::STRICT_FCEIL,  MVT::f64, Legal);
353     setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
354     setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
355   }
356 
357   // We don't support sin/cos/sqrt/fmod/pow
358   setOperationAction(ISD::FSIN , MVT::f64, Expand);
359   setOperationAction(ISD::FCOS , MVT::f64, Expand);
360   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
361   setOperationAction(ISD::FREM , MVT::f64, Expand);
362   setOperationAction(ISD::FPOW , MVT::f64, Expand);
363   setOperationAction(ISD::FSIN , MVT::f32, Expand);
364   setOperationAction(ISD::FCOS , MVT::f32, Expand);
365   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
366   setOperationAction(ISD::FREM , MVT::f32, Expand);
367   setOperationAction(ISD::FPOW , MVT::f32, Expand);
368   if (Subtarget.hasSPE()) {
369     setOperationAction(ISD::FMA  , MVT::f64, Expand);
370     setOperationAction(ISD::FMA  , MVT::f32, Expand);
371   } else {
372     setOperationAction(ISD::FMA  , MVT::f64, Legal);
373     setOperationAction(ISD::FMA  , MVT::f32, Legal);
374   }
375 
376   if (Subtarget.hasSPE())
377     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
378 
379   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
380 
381   // If we're enabling GP optimizations, use hardware square root
382   if (!Subtarget.hasFSQRT() &&
383       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
384         Subtarget.hasFRE()))
385     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
386 
387   if (!Subtarget.hasFSQRT() &&
388       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
389         Subtarget.hasFRES()))
390     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
391 
392   if (Subtarget.hasFCPSGN()) {
393     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
394     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
395   } else {
396     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
397     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
398   }
399 
400   if (Subtarget.hasFPRND()) {
401     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
402     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
403     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
404     setOperationAction(ISD::FROUND, MVT::f64, Legal);
405 
406     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
407     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
408     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
409     setOperationAction(ISD::FROUND, MVT::f32, Legal);
410   }
411 
412   // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
413   // to speed up scalar BSWAP64.
414   // CTPOP or CTTZ were introduced in P8/P9 respectively
415   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
416   if (Subtarget.hasP9Vector())
417     setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
418   else
419     setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
420   if (Subtarget.isISA3_0()) {
421     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
422     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
423   } else {
424     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
425     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
426   }
427 
428   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
429     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
430     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
431   } else {
432     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
433     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
434   }
435 
436   // PowerPC does not have ROTR
437   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
438   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
439 
440   if (!Subtarget.useCRBits()) {
441     // PowerPC does not have Select
442     setOperationAction(ISD::SELECT, MVT::i32, Expand);
443     setOperationAction(ISD::SELECT, MVT::i64, Expand);
444     setOperationAction(ISD::SELECT, MVT::f32, Expand);
445     setOperationAction(ISD::SELECT, MVT::f64, Expand);
446   }
447 
448   // PowerPC wants to turn select_cc of FP into fsel when possible.
449   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
450   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
451 
452   // PowerPC wants to optimize integer setcc a bit
453   if (!Subtarget.useCRBits())
454     setOperationAction(ISD::SETCC, MVT::i32, Custom);
455 
456   if (Subtarget.hasFPU()) {
457     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
458     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
459     setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
460 
461     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
462     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
463     setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
464   }
465 
466   // PowerPC does not have BRCOND which requires SetCC
467   if (!Subtarget.useCRBits())
468     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
469 
470   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
471 
472   if (Subtarget.hasSPE()) {
473     // SPE has built-in conversions
474     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
475     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
476     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
477     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
478     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
479     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
480   } else {
481     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
482     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
483     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
484 
485     // PowerPC does not have [U|S]INT_TO_FP
486     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
487     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
488     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
489     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
490   }
491 
492   if (Subtarget.hasDirectMove() && isPPC64) {
493     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
494     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
495     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
496     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
497     if (TM.Options.UnsafeFPMath) {
498       setOperationAction(ISD::LRINT, MVT::f64, Legal);
499       setOperationAction(ISD::LRINT, MVT::f32, Legal);
500       setOperationAction(ISD::LLRINT, MVT::f64, Legal);
501       setOperationAction(ISD::LLRINT, MVT::f32, Legal);
502       setOperationAction(ISD::LROUND, MVT::f64, Legal);
503       setOperationAction(ISD::LROUND, MVT::f32, Legal);
504       setOperationAction(ISD::LLROUND, MVT::f64, Legal);
505       setOperationAction(ISD::LLROUND, MVT::f32, Legal);
506     }
507   } else {
508     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
509     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
510     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
511     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
512   }
513 
514   // We cannot sextinreg(i1).  Expand to shifts.
515   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
516 
517   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
518   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
519   // support continuation, user-level threading, and etc.. As a result, no
520   // other SjLj exception interfaces are implemented and please don't build
521   // your own exception handling based on them.
522   // LLVM/Clang supports zero-cost DWARF exception handling.
523   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
524   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
525 
526   // We want to legalize GlobalAddress and ConstantPool nodes into the
527   // appropriate instructions to materialize the address.
528   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
529   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
530   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
531   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
532   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
533   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
534   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
535   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
536   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
537   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
538 
539   // TRAP is legal.
540   setOperationAction(ISD::TRAP, MVT::Other, Legal);
541 
542   // TRAMPOLINE is custom lowered.
543   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
544   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
545 
546   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
547   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
548 
549   if (Subtarget.is64BitELFABI()) {
550     // VAARG always uses double-word chunks, so promote anything smaller.
551     setOperationAction(ISD::VAARG, MVT::i1, Promote);
552     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
553     setOperationAction(ISD::VAARG, MVT::i8, Promote);
554     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
555     setOperationAction(ISD::VAARG, MVT::i16, Promote);
556     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
557     setOperationAction(ISD::VAARG, MVT::i32, Promote);
558     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
559     setOperationAction(ISD::VAARG, MVT::Other, Expand);
560   } else if (Subtarget.is32BitELFABI()) {
561     // VAARG is custom lowered with the 32-bit SVR4 ABI.
562     setOperationAction(ISD::VAARG, MVT::Other, Custom);
563     setOperationAction(ISD::VAARG, MVT::i64, Custom);
564   } else
565     setOperationAction(ISD::VAARG, MVT::Other, Expand);
566 
567   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
568   if (Subtarget.is32BitELFABI())
569     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
570   else
571     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
572 
573   // Use the default implementation.
574   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
575   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
576   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
577   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
578   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
579   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
580   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
581   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
582   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
583 
584   // We want to custom lower some of our intrinsics.
585   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
586 
587   // To handle counter-based loop conditions.
588   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
589 
590   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
591   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
592   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
593   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
594 
595   // Comparisons that require checking two conditions.
596   if (Subtarget.hasSPE()) {
597     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
598     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
599     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
600     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
601   }
602   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
603   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
604   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
605   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
606   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
607   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
608   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
609   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
610   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
611   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
612   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
613   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
614 
615   setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
616   setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
617 
618   if (Subtarget.has64BitSupport()) {
619     // They also have instructions for converting between i64 and fp.
620     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
621     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
622     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
623     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
624     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
625     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
626     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
627     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
628     // This is just the low 32 bits of a (signed) fp->i64 conversion.
629     // We cannot do this with Promote because i64 is not a legal type.
630     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
631     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
632 
633     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
634       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
635       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
636     }
637   } else {
638     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
639     if (Subtarget.hasSPE()) {
640       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
641       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
642     } else {
643       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
644       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
645     }
646   }
647 
648   // With the instructions enabled under FPCVT, we can do everything.
649   if (Subtarget.hasFPCVT()) {
650     if (Subtarget.has64BitSupport()) {
651       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
652       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
653       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
654       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
655       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
656       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
657       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
658       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
659     }
660 
661     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
662     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
663     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
664     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
665     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
666     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
667     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
668     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
669   }
670 
671   if (Subtarget.use64BitRegs()) {
672     // 64-bit PowerPC implementations can support i64 types directly
673     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
674     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
675     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
676     // 64-bit PowerPC wants to expand i128 shifts itself.
677     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
678     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
679     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
680   } else {
681     // 32-bit PowerPC wants to expand i64 shifts itself.
682     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
683     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
684     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
685   }
686 
687   // PowerPC has better expansions for funnel shifts than the generic
688   // TargetLowering::expandFunnelShift.
689   if (Subtarget.has64BitSupport()) {
690     setOperationAction(ISD::FSHL, MVT::i64, Custom);
691     setOperationAction(ISD::FSHR, MVT::i64, Custom);
692   }
693   setOperationAction(ISD::FSHL, MVT::i32, Custom);
694   setOperationAction(ISD::FSHR, MVT::i32, Custom);
695 
696   if (Subtarget.hasVSX()) {
697     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
698     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
699     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
700     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
701   }
702 
703   if (Subtarget.hasAltivec()) {
704     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
705       setOperationAction(ISD::SADDSAT, VT, Legal);
706       setOperationAction(ISD::SSUBSAT, VT, Legal);
707       setOperationAction(ISD::UADDSAT, VT, Legal);
708       setOperationAction(ISD::USUBSAT, VT, Legal);
709     }
710     // First set operation action for all vector types to expand. Then we
711     // will selectively turn on ones that can be effectively codegen'd.
712     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
713       // add/sub are legal for all supported vector VT's.
714       setOperationAction(ISD::ADD, VT, Legal);
715       setOperationAction(ISD::SUB, VT, Legal);
716 
717       // For v2i64, these are only valid with P8Vector. This is corrected after
718       // the loop.
719       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
720         setOperationAction(ISD::SMAX, VT, Legal);
721         setOperationAction(ISD::SMIN, VT, Legal);
722         setOperationAction(ISD::UMAX, VT, Legal);
723         setOperationAction(ISD::UMIN, VT, Legal);
724       }
725       else {
726         setOperationAction(ISD::SMAX, VT, Expand);
727         setOperationAction(ISD::SMIN, VT, Expand);
728         setOperationAction(ISD::UMAX, VT, Expand);
729         setOperationAction(ISD::UMIN, VT, Expand);
730       }
731 
732       if (Subtarget.hasVSX()) {
733         setOperationAction(ISD::FMAXNUM, VT, Legal);
734         setOperationAction(ISD::FMINNUM, VT, Legal);
735       }
736 
737       // Vector instructions introduced in P8
738       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
739         setOperationAction(ISD::CTPOP, VT, Legal);
740         setOperationAction(ISD::CTLZ, VT, Legal);
741       }
742       else {
743         setOperationAction(ISD::CTPOP, VT, Expand);
744         setOperationAction(ISD::CTLZ, VT, Expand);
745       }
746 
747       // Vector instructions introduced in P9
748       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
749         setOperationAction(ISD::CTTZ, VT, Legal);
750       else
751         setOperationAction(ISD::CTTZ, VT, Expand);
752 
753       // We promote all shuffles to v16i8.
754       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
755       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
756 
757       // We promote all non-typed operations to v4i32.
758       setOperationAction(ISD::AND   , VT, Promote);
759       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
760       setOperationAction(ISD::OR    , VT, Promote);
761       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
762       setOperationAction(ISD::XOR   , VT, Promote);
763       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
764       setOperationAction(ISD::LOAD  , VT, Promote);
765       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
766       setOperationAction(ISD::SELECT, VT, Promote);
767       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
768       setOperationAction(ISD::VSELECT, VT, Legal);
769       setOperationAction(ISD::SELECT_CC, VT, Promote);
770       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
771       setOperationAction(ISD::STORE, VT, Promote);
772       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
773 
774       // No other operations are legal.
775       setOperationAction(ISD::MUL , VT, Expand);
776       setOperationAction(ISD::SDIV, VT, Expand);
777       setOperationAction(ISD::SREM, VT, Expand);
778       setOperationAction(ISD::UDIV, VT, Expand);
779       setOperationAction(ISD::UREM, VT, Expand);
780       setOperationAction(ISD::FDIV, VT, Expand);
781       setOperationAction(ISD::FREM, VT, Expand);
782       setOperationAction(ISD::FNEG, VT, Expand);
783       setOperationAction(ISD::FSQRT, VT, Expand);
784       setOperationAction(ISD::FLOG, VT, Expand);
785       setOperationAction(ISD::FLOG10, VT, Expand);
786       setOperationAction(ISD::FLOG2, VT, Expand);
787       setOperationAction(ISD::FEXP, VT, Expand);
788       setOperationAction(ISD::FEXP2, VT, Expand);
789       setOperationAction(ISD::FSIN, VT, Expand);
790       setOperationAction(ISD::FCOS, VT, Expand);
791       setOperationAction(ISD::FABS, VT, Expand);
792       setOperationAction(ISD::FFLOOR, VT, Expand);
793       setOperationAction(ISD::FCEIL,  VT, Expand);
794       setOperationAction(ISD::FTRUNC, VT, Expand);
795       setOperationAction(ISD::FRINT,  VT, Expand);
796       setOperationAction(ISD::FNEARBYINT, VT, Expand);
797       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
798       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
799       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
800       setOperationAction(ISD::MULHU, VT, Expand);
801       setOperationAction(ISD::MULHS, VT, Expand);
802       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
803       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
804       setOperationAction(ISD::UDIVREM, VT, Expand);
805       setOperationAction(ISD::SDIVREM, VT, Expand);
806       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
807       setOperationAction(ISD::FPOW, VT, Expand);
808       setOperationAction(ISD::BSWAP, VT, Expand);
809       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
810       setOperationAction(ISD::ROTL, VT, Expand);
811       setOperationAction(ISD::ROTR, VT, Expand);
812 
813       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
814         setTruncStoreAction(VT, InnerVT, Expand);
815         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
816         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
817         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
818       }
819     }
820     setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
821     if (!Subtarget.hasP8Vector()) {
822       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
823       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
824       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
825       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
826     }
827 
828     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
829     // with merges, splats, etc.
830     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
831 
832     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
833     // are cheap, so handle them before they get expanded to scalar.
834     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
835     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
836     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
837     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
838     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
839 
840     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
841     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
842     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
843     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
844     setOperationAction(ISD::SELECT, MVT::v4i32,
845                        Subtarget.useCRBits() ? Legal : Expand);
846     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
847     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
848     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
849     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
850     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
851     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
852     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
853     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
854     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
855     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
856     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
857     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
858     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
859 
860     // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
861     setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
862     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
863     if (Subtarget.hasAltivec())
864       for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
865         setOperationAction(ISD::ROTL, VT, Legal);
866     // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
867     if (Subtarget.hasP8Altivec())
868       setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
869 
870     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
871     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
872     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
873     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
874 
875     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
876     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
877 
878     if (Subtarget.hasVSX()) {
879       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
880       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
881     }
882 
883     if (Subtarget.hasP8Altivec())
884       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
885     else
886       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
887 
888     if (Subtarget.isISA3_1()) {
889       setOperationAction(ISD::MUL, MVT::v2i64, Legal);
890       setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
891       setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
892       setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
893       setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
894       setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
895       setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
896       setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
897       setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
898       setOperationAction(ISD::UREM, MVT::v2i64, Legal);
899       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
900       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
901       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
902       setOperationAction(ISD::UREM, MVT::v1i128, Legal);
903       setOperationAction(ISD::SREM, MVT::v1i128, Legal);
904       setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
905       setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
906       setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
907     }
908 
909     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
910     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
911 
912     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
913     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
914 
915     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
916     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
917     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
918     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
919 
920     // Altivec does not contain unordered floating-point compare instructions
921     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
922     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
923     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
924     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
925 
926     if (Subtarget.hasVSX()) {
927       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
928       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
929       if (Subtarget.hasP8Vector()) {
930         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
931         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
932       }
933       if (Subtarget.hasDirectMove() && isPPC64) {
934         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
935         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
936         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
937         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
938         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
939         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
940         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
941         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
942       }
943       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
944 
945       // The nearbyint variants are not allowed to raise the inexact exception
946       // so we can only code-gen them with unsafe math.
947       if (TM.Options.UnsafeFPMath) {
948         setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
949         setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
950       }
951 
952       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
953       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
954       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
955       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
956       setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
957       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
958       setOperationAction(ISD::FROUND, MVT::f64, Legal);
959       setOperationAction(ISD::FRINT, MVT::f64, Legal);
960 
961       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
962       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
963       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
964       setOperationAction(ISD::FROUND, MVT::f32, Legal);
965       setOperationAction(ISD::FRINT, MVT::f32, Legal);
966 
967       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
968       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
969 
970       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
971       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
972 
973       // Share the Altivec comparison restrictions.
974       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
975       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
976       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
977       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
978 
979       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
980       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
981 
982       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
983 
984       if (Subtarget.hasP8Vector())
985         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
986 
987       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
988 
989       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
990       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
991       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
992 
993       if (Subtarget.hasP8Altivec()) {
994         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
995         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
996         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
997 
998         // 128 bit shifts can be accomplished via 3 instructions for SHL and
999         // SRL, but not for SRA because of the instructions available:
1000         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1001         // doing
1002         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1003         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1004         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1005 
1006         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1007       }
1008       else {
1009         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1010         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1011         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1012 
1013         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1014 
1015         // VSX v2i64 only supports non-arithmetic operations.
1016         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1017         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1018       }
1019 
1020       if (Subtarget.isISA3_1())
1021         setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1022       else
1023         setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1024 
1025       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1026       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1027       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1028       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1029 
1030       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1031 
1032       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1033       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1034       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1035       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1036       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1037       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1038       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1039       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1040 
1041       // Custom handling for partial vectors of integers converted to
1042       // floating point. We already have optimal handling for v2i32 through
1043       // the DAG combine, so those aren't necessary.
1044       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1045       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1046       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1047       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1048       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1049       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1050       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1051       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1052       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1053       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1054       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1055       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1056       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1057       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1058       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1059       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1060 
1061       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1062       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1063       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1064       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1065       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1066       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1067 
1068       if (Subtarget.hasDirectMove())
1069         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1070       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1071 
1072       // Handle constrained floating-point operations of vector.
1073       // The predictor is `hasVSX` because altivec instruction has
1074       // no exception but VSX vector instruction has.
1075       setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1076       setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1077       setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1078       setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1079       setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1080       setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1081       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1082       setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1083       setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1084       setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1085       setOperationAction(ISD::STRICT_FCEIL,  MVT::v4f32, Legal);
1086       setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1087       setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1088 
1089       setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1090       setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1091       setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1092       setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1093       setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1094       setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1095       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1096       setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1097       setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1098       setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1099       setOperationAction(ISD::STRICT_FCEIL,  MVT::v2f64, Legal);
1100       setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1101       setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1102 
1103       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1104     }
1105 
1106     if (Subtarget.hasP8Altivec()) {
1107       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1108       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1109     }
1110 
1111     if (Subtarget.hasP9Vector()) {
1112       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1113       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1114 
1115       // 128 bit shifts can be accomplished via 3 instructions for SHL and
1116       // SRL, but not for SRA because of the instructions available:
1117       // VS{RL} and VS{RL}O.
1118       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1119       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1120       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1121 
1122       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1123       setOperationAction(ISD::FADD, MVT::f128, Legal);
1124       setOperationAction(ISD::FSUB, MVT::f128, Legal);
1125       setOperationAction(ISD::FDIV, MVT::f128, Legal);
1126       setOperationAction(ISD::FMUL, MVT::f128, Legal);
1127       setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1128       // No extending loads to f128 on PPC.
1129       for (MVT FPT : MVT::fp_valuetypes())
1130         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1131       setOperationAction(ISD::FMA, MVT::f128, Legal);
1132       setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1133       setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1134       setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1135       setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1136       setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1137       setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1138 
1139       setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1140       setOperationAction(ISD::FRINT, MVT::f128, Legal);
1141       setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1142       setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1143       setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1144       setOperationAction(ISD::FROUND, MVT::f128, Legal);
1145 
1146       setOperationAction(ISD::SELECT, MVT::f128, Expand);
1147       setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1148       setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1149       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1150       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1151       setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1152       // No implementation for these ops for PowerPC.
1153       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1154       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1155       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1156       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1157       setOperationAction(ISD::FREM, MVT::f128, Expand);
1158 
1159       // Handle constrained floating-point operations of fp128
1160       setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1161       setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1162       setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1163       setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1164       setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1165       setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1166       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1167       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1168       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1169       setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1170       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1171       setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1172       setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1173       setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1174       setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1175       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1176       setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1177       setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1178       setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1179       setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1180     } else if (Subtarget.hasAltivec() && EnableSoftFP128) {
1181       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1182 
1183       for (MVT FPT : MVT::fp_valuetypes())
1184         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1185 
1186       setOperationAction(ISD::LOAD, MVT::f128, Promote);
1187       setOperationAction(ISD::STORE, MVT::f128, Promote);
1188 
1189       AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1190       AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1191 
1192       // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1193       // fp_to_uint and int_to_fp.
1194       setOperationAction(ISD::FADD, MVT::f128, LibCall);
1195       setOperationAction(ISD::FSUB, MVT::f128, LibCall);
1196 
1197       setOperationAction(ISD::FMUL, MVT::f128, Expand);
1198       setOperationAction(ISD::FDIV, MVT::f128, Expand);
1199       setOperationAction(ISD::FNEG, MVT::f128, Expand);
1200       setOperationAction(ISD::FABS, MVT::f128, Expand);
1201       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1202       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1203       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1204       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1205       setOperationAction(ISD::FREM, MVT::f128, Expand);
1206       setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1207       setOperationAction(ISD::FMA, MVT::f128, Expand);
1208       setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
1209 
1210       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1211       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1212 
1213       // Expand the fp_extend if the target type is fp128.
1214       setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1215       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);
1216 
1217       // Expand the fp_round if the source type is fp128.
1218       for (MVT VT : {MVT::f32, MVT::f64}) {
1219         setOperationAction(ISD::FP_ROUND, VT, Custom);
1220         setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1221       }
1222     }
1223 
1224     if (Subtarget.hasP9Altivec()) {
1225       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1226       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1227 
1228       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1229       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1230       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1231       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1232       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1233       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1234       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1235     }
1236 
1237     if (Subtarget.isISA3_1()) {
1238       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
1239       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
1240     }
1241   }
1242 
1243   if (Subtarget.pairedVectorMemops()) {
1244     addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1245     setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1246     setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1247   }
1248   if (Subtarget.hasMMA()) {
1249     addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1250     setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1251     setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1252     setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1253   }
1254 
1255   if (Subtarget.has64BitSupport())
1256     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1257 
1258   if (Subtarget.isISA3_1())
1259     setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1260 
1261   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1262 
1263   if (!isPPC64) {
1264     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1265     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1266   }
1267 
1268   setBooleanContents(ZeroOrOneBooleanContent);
1269 
1270   if (Subtarget.hasAltivec()) {
1271     // Altivec instructions set fields to all zeros or all ones.
1272     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1273   }
1274 
1275   if (!isPPC64) {
1276     // These libcalls are not available in 32-bit.
1277     setLibcallName(RTLIB::SHL_I128, nullptr);
1278     setLibcallName(RTLIB::SRL_I128, nullptr);
1279     setLibcallName(RTLIB::SRA_I128, nullptr);
1280   }
1281 
1282   if (!isPPC64)
1283     setMaxAtomicSizeInBitsSupported(32);
1284 
1285   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1286 
1287   // We have target-specific dag combine patterns for the following nodes:
1288   setTargetDAGCombine(ISD::ADD);
1289   setTargetDAGCombine(ISD::SHL);
1290   setTargetDAGCombine(ISD::SRA);
1291   setTargetDAGCombine(ISD::SRL);
1292   setTargetDAGCombine(ISD::MUL);
1293   setTargetDAGCombine(ISD::FMA);
1294   setTargetDAGCombine(ISD::SINT_TO_FP);
1295   setTargetDAGCombine(ISD::BUILD_VECTOR);
1296   if (Subtarget.hasFPCVT())
1297     setTargetDAGCombine(ISD::UINT_TO_FP);
1298   setTargetDAGCombine(ISD::LOAD);
1299   setTargetDAGCombine(ISD::STORE);
1300   setTargetDAGCombine(ISD::BR_CC);
1301   if (Subtarget.useCRBits())
1302     setTargetDAGCombine(ISD::BRCOND);
1303   setTargetDAGCombine(ISD::BSWAP);
1304   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1305   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1306   setTargetDAGCombine(ISD::INTRINSIC_VOID);
1307 
1308   setTargetDAGCombine(ISD::SIGN_EXTEND);
1309   setTargetDAGCombine(ISD::ZERO_EXTEND);
1310   setTargetDAGCombine(ISD::ANY_EXTEND);
1311 
1312   setTargetDAGCombine(ISD::TRUNCATE);
1313   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1314 
1315 
1316   if (Subtarget.useCRBits()) {
1317     setTargetDAGCombine(ISD::TRUNCATE);
1318     setTargetDAGCombine(ISD::SETCC);
1319     setTargetDAGCombine(ISD::SELECT_CC);
1320   }
1321 
1322   if (Subtarget.hasP9Altivec()) {
1323     setTargetDAGCombine(ISD::ABS);
1324     setTargetDAGCombine(ISD::VSELECT);
1325   }
1326 
1327   setLibcallName(RTLIB::LOG_F128, "logf128");
1328   setLibcallName(RTLIB::LOG2_F128, "log2f128");
1329   setLibcallName(RTLIB::LOG10_F128, "log10f128");
1330   setLibcallName(RTLIB::EXP_F128, "expf128");
1331   setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1332   setLibcallName(RTLIB::SIN_F128, "sinf128");
1333   setLibcallName(RTLIB::COS_F128, "cosf128");
1334   setLibcallName(RTLIB::POW_F128, "powf128");
1335   setLibcallName(RTLIB::FMIN_F128, "fminf128");
1336   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1337   setLibcallName(RTLIB::REM_F128, "fmodf128");
1338   setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1339   setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1340   setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1341   setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1342   setLibcallName(RTLIB::ROUND_F128, "roundf128");
1343   setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1344   setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1345   setLibcallName(RTLIB::RINT_F128, "rintf128");
1346   setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1347   setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1348   setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1349   setLibcallName(RTLIB::FMA_F128, "fmaf128");
1350 
1351   // With 32 condition bits, we don't need to sink (and duplicate) compares
1352   // aggressively in CodeGenPrep.
1353   if (Subtarget.useCRBits()) {
1354     setHasMultipleConditionRegisters();
1355     setJumpIsExpensive();
1356   }
1357 
1358   setMinFunctionAlignment(Align(4));
1359 
1360   switch (Subtarget.getCPUDirective()) {
1361   default: break;
1362   case PPC::DIR_970:
1363   case PPC::DIR_A2:
1364   case PPC::DIR_E500:
1365   case PPC::DIR_E500mc:
1366   case PPC::DIR_E5500:
1367   case PPC::DIR_PWR4:
1368   case PPC::DIR_PWR5:
1369   case PPC::DIR_PWR5X:
1370   case PPC::DIR_PWR6:
1371   case PPC::DIR_PWR6X:
1372   case PPC::DIR_PWR7:
1373   case PPC::DIR_PWR8:
1374   case PPC::DIR_PWR9:
1375   case PPC::DIR_PWR10:
1376   case PPC::DIR_PWR_FUTURE:
1377     setPrefLoopAlignment(Align(16));
1378     setPrefFunctionAlignment(Align(16));
1379     break;
1380   }
1381 
1382   if (Subtarget.enableMachineScheduler())
1383     setSchedulingPreference(Sched::Source);
1384   else
1385     setSchedulingPreference(Sched::Hybrid);
1386 
1387   computeRegisterProperties(STI.getRegisterInfo());
1388 
1389   // The Freescale cores do better with aggressive inlining of memcpy and
1390   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1391   if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1392       Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1393     MaxStoresPerMemset = 32;
1394     MaxStoresPerMemsetOptSize = 16;
1395     MaxStoresPerMemcpy = 32;
1396     MaxStoresPerMemcpyOptSize = 8;
1397     MaxStoresPerMemmove = 32;
1398     MaxStoresPerMemmoveOptSize = 8;
1399   } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1400     // The A2 also benefits from (very) aggressive inlining of memcpy and
1401     // friends. The overhead of a the function call, even when warm, can be
1402     // over one hundred cycles.
1403     MaxStoresPerMemset = 128;
1404     MaxStoresPerMemcpy = 128;
1405     MaxStoresPerMemmove = 128;
1406     MaxLoadsPerMemcmp = 128;
1407   } else {
1408     MaxLoadsPerMemcmp = 8;
1409     MaxLoadsPerMemcmpOptSize = 4;
1410   }
1411 
1412   IsStrictFPEnabled = true;
1413 
1414   // Let the subtarget (CPU) decide if a predictable select is more expensive
1415   // than the corresponding branch. This information is used in CGP to decide
1416   // when to convert selects into branches.
1417   PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1418 }
1419 
1420 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1421 /// the desired ByVal argument alignment.
1422 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1423   if (MaxAlign == MaxMaxAlign)
1424     return;
1425   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1426     if (MaxMaxAlign >= 32 &&
1427         VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1428       MaxAlign = Align(32);
1429     else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1430              MaxAlign < 16)
1431       MaxAlign = Align(16);
1432   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1433     Align EltAlign;
1434     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1435     if (EltAlign > MaxAlign)
1436       MaxAlign = EltAlign;
1437   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1438     for (auto *EltTy : STy->elements()) {
1439       Align EltAlign;
1440       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1441       if (EltAlign > MaxAlign)
1442         MaxAlign = EltAlign;
1443       if (MaxAlign == MaxMaxAlign)
1444         break;
1445     }
1446   }
1447 }
1448 
1449 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1450 /// function arguments in the caller parameter area.
1451 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1452                                                   const DataLayout &DL) const {
1453   // 16byte and wider vectors are passed on 16byte boundary.
1454   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1455   Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1456   if (Subtarget.hasAltivec())
1457     getMaxByValAlign(Ty, Alignment, Align(16));
1458   return Alignment.value();
1459 }
1460 
1461 bool PPCTargetLowering::useSoftFloat() const {
1462   return Subtarget.useSoftFloat();
1463 }
1464 
1465 bool PPCTargetLowering::hasSPE() const {
1466   return Subtarget.hasSPE();
1467 }
1468 
1469 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1470   return VT.isScalarInteger();
1471 }
1472 
1473 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1474   switch ((PPCISD::NodeType)Opcode) {
1475   case PPCISD::FIRST_NUMBER:    break;
1476   case PPCISD::FSEL:            return "PPCISD::FSEL";
1477   case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
1478   case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
1479   case PPCISD::FCFID:           return "PPCISD::FCFID";
1480   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1481   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1482   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1483   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1484   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1485   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1486   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1487   case PPCISD::FP_TO_UINT_IN_VSR:
1488                                 return "PPCISD::FP_TO_UINT_IN_VSR,";
1489   case PPCISD::FP_TO_SINT_IN_VSR:
1490                                 return "PPCISD::FP_TO_SINT_IN_VSR";
1491   case PPCISD::FRE:             return "PPCISD::FRE";
1492   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1493   case PPCISD::FTSQRT:
1494     return "PPCISD::FTSQRT";
1495   case PPCISD::FSQRT:
1496     return "PPCISD::FSQRT";
1497   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1498   case PPCISD::VPERM:           return "PPCISD::VPERM";
1499   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1500   case PPCISD::XXSPLTI_SP_TO_DP:
1501     return "PPCISD::XXSPLTI_SP_TO_DP";
1502   case PPCISD::XXSPLTI32DX:
1503     return "PPCISD::XXSPLTI32DX";
1504   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1505   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1506   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1507   case PPCISD::CMPB:            return "PPCISD::CMPB";
1508   case PPCISD::Hi:              return "PPCISD::Hi";
1509   case PPCISD::Lo:              return "PPCISD::Lo";
1510   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1511   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1512   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1513   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1514   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1515   case PPCISD::PROBED_ALLOCA:   return "PPCISD::PROBED_ALLOCA";
1516   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1517   case PPCISD::SRL:             return "PPCISD::SRL";
1518   case PPCISD::SRA:             return "PPCISD::SRA";
1519   case PPCISD::SHL:             return "PPCISD::SHL";
1520   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1521   case PPCISD::CALL:            return "PPCISD::CALL";
1522   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1523   case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1524   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1525   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1526   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1527   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1528   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1529   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1530   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1531   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1532   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1533   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1534   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1535   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1536   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1537   case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1538     return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1539   case PPCISD::ANDI_rec_1_EQ_BIT:
1540     return "PPCISD::ANDI_rec_1_EQ_BIT";
1541   case PPCISD::ANDI_rec_1_GT_BIT:
1542     return "PPCISD::ANDI_rec_1_GT_BIT";
1543   case PPCISD::VCMP:            return "PPCISD::VCMP";
1544   case PPCISD::VCMP_rec:        return "PPCISD::VCMP_rec";
1545   case PPCISD::LBRX:            return "PPCISD::LBRX";
1546   case PPCISD::STBRX:           return "PPCISD::STBRX";
1547   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1548   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1549   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1550   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1551   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1552   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1553   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1554   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1555   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1556   case PPCISD::ST_VSR_SCAL_INT:
1557                                 return "PPCISD::ST_VSR_SCAL_INT";
1558   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1559   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1560   case PPCISD::BDZ:             return "PPCISD::BDZ";
1561   case PPCISD::MFFS:            return "PPCISD::MFFS";
1562   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1563   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1564   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1565   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1566   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1567   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1568   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1569   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1570   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1571   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1572   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1573   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1574   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1575   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1576   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1577   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1578   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1579   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1580   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1581   case PPCISD::PADDI_DTPREL:
1582     return "PPCISD::PADDI_DTPREL";
1583   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1584   case PPCISD::SC:              return "PPCISD::SC";
1585   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1586   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1587   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1588   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1589   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1590   case PPCISD::VABSD:           return "PPCISD::VABSD";
1591   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1592   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1593   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1594   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1595   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1596   case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1597   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1598   case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1599     return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1600   case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1601     return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1602   case PPCISD::ACC_BUILD:       return "PPCISD::ACC_BUILD";
1603   case PPCISD::PAIR_BUILD:      return "PPCISD::PAIR_BUILD";
1604   case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1605   case PPCISD::XXMFACC:         return "PPCISD::XXMFACC";
1606   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1607   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
1608   case PPCISD::STRICT_FADDRTZ:
1609     return "PPCISD::STRICT_FADDRTZ";
1610   case PPCISD::STRICT_FCTIDZ:
1611     return "PPCISD::STRICT_FCTIDZ";
1612   case PPCISD::STRICT_FCTIWZ:
1613     return "PPCISD::STRICT_FCTIWZ";
1614   case PPCISD::STRICT_FCTIDUZ:
1615     return "PPCISD::STRICT_FCTIDUZ";
1616   case PPCISD::STRICT_FCTIWUZ:
1617     return "PPCISD::STRICT_FCTIWUZ";
1618   case PPCISD::STRICT_FCFID:
1619     return "PPCISD::STRICT_FCFID";
1620   case PPCISD::STRICT_FCFIDU:
1621     return "PPCISD::STRICT_FCFIDU";
1622   case PPCISD::STRICT_FCFIDS:
1623     return "PPCISD::STRICT_FCFIDS";
1624   case PPCISD::STRICT_FCFIDUS:
1625     return "PPCISD::STRICT_FCFIDUS";
1626   case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
1627   }
1628   return nullptr;
1629 }
1630 
1631 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1632                                           EVT VT) const {
1633   if (!VT.isVector())
1634     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1635 
1636   return VT.changeVectorElementTypeToInteger();
1637 }
1638 
1639 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1640   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1641   return true;
1642 }
1643 
1644 //===----------------------------------------------------------------------===//
1645 // Node matching predicates, for use by the tblgen matching code.
1646 //===----------------------------------------------------------------------===//
1647 
1648 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1649 static bool isFloatingPointZero(SDValue Op) {
1650   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1651     return CFP->getValueAPF().isZero();
1652   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1653     // Maybe this has already been legalized into the constant pool?
1654     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1655       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1656         return CFP->getValueAPF().isZero();
1657   }
1658   return false;
1659 }
1660 
1661 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1662 /// true if Op is undef or if it matches the specified value.
1663 static bool isConstantOrUndef(int Op, int Val) {
1664   return Op < 0 || Op == Val;
1665 }
1666 
1667 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1668 /// VPKUHUM instruction.
1669 /// The ShuffleKind distinguishes between big-endian operations with
1670 /// two different inputs (0), either-endian operations with two identical
1671 /// inputs (1), and little-endian operations with two different inputs (2).
1672 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1673 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1674                                SelectionDAG &DAG) {
1675   bool IsLE = DAG.getDataLayout().isLittleEndian();
1676   if (ShuffleKind == 0) {
1677     if (IsLE)
1678       return false;
1679     for (unsigned i = 0; i != 16; ++i)
1680       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1681         return false;
1682   } else if (ShuffleKind == 2) {
1683     if (!IsLE)
1684       return false;
1685     for (unsigned i = 0; i != 16; ++i)
1686       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1687         return false;
1688   } else if (ShuffleKind == 1) {
1689     unsigned j = IsLE ? 0 : 1;
1690     for (unsigned i = 0; i != 8; ++i)
1691       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1692           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1693         return false;
1694   }
1695   return true;
1696 }
1697 
1698 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1699 /// VPKUWUM instruction.
1700 /// The ShuffleKind distinguishes between big-endian operations with
1701 /// two different inputs (0), either-endian operations with two identical
1702 /// inputs (1), and little-endian operations with two different inputs (2).
1703 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1704 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1705                                SelectionDAG &DAG) {
1706   bool IsLE = DAG.getDataLayout().isLittleEndian();
1707   if (ShuffleKind == 0) {
1708     if (IsLE)
1709       return false;
1710     for (unsigned i = 0; i != 16; i += 2)
1711       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1712           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1713         return false;
1714   } else if (ShuffleKind == 2) {
1715     if (!IsLE)
1716       return false;
1717     for (unsigned i = 0; i != 16; i += 2)
1718       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1719           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1720         return false;
1721   } else if (ShuffleKind == 1) {
1722     unsigned j = IsLE ? 0 : 2;
1723     for (unsigned i = 0; i != 8; i += 2)
1724       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1725           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1726           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1727           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1728         return false;
1729   }
1730   return true;
1731 }
1732 
1733 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1734 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1735 /// current subtarget.
1736 ///
1737 /// The ShuffleKind distinguishes between big-endian operations with
1738 /// two different inputs (0), either-endian operations with two identical
1739 /// inputs (1), and little-endian operations with two different inputs (2).
1740 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1741 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1742                                SelectionDAG &DAG) {
1743   const PPCSubtarget& Subtarget =
1744       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1745   if (!Subtarget.hasP8Vector())
1746     return false;
1747 
1748   bool IsLE = DAG.getDataLayout().isLittleEndian();
1749   if (ShuffleKind == 0) {
1750     if (IsLE)
1751       return false;
1752     for (unsigned i = 0; i != 16; i += 4)
1753       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1754           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1755           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1756           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1757         return false;
1758   } else if (ShuffleKind == 2) {
1759     if (!IsLE)
1760       return false;
1761     for (unsigned i = 0; i != 16; i += 4)
1762       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1763           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1764           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1765           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1766         return false;
1767   } else if (ShuffleKind == 1) {
1768     unsigned j = IsLE ? 0 : 4;
1769     for (unsigned i = 0; i != 8; i += 4)
1770       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1771           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1772           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1773           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1774           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1775           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1776           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1777           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1778         return false;
1779   }
1780   return true;
1781 }
1782 
1783 /// isVMerge - Common function, used to match vmrg* shuffles.
1784 ///
1785 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1786                      unsigned LHSStart, unsigned RHSStart) {
1787   if (N->getValueType(0) != MVT::v16i8)
1788     return false;
1789   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1790          "Unsupported merge size!");
1791 
1792   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1793     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1794       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1795                              LHSStart+j+i*UnitSize) ||
1796           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1797                              RHSStart+j+i*UnitSize))
1798         return false;
1799     }
1800   return true;
1801 }
1802 
1803 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1804 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1805 /// The ShuffleKind distinguishes between big-endian merges with two
1806 /// different inputs (0), either-endian merges with two identical inputs (1),
1807 /// and little-endian merges with two different inputs (2).  For the latter,
1808 /// the input operands are swapped (see PPCInstrAltivec.td).
1809 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1810                              unsigned ShuffleKind, SelectionDAG &DAG) {
1811   if (DAG.getDataLayout().isLittleEndian()) {
1812     if (ShuffleKind == 1) // unary
1813       return isVMerge(N, UnitSize, 0, 0);
1814     else if (ShuffleKind == 2) // swapped
1815       return isVMerge(N, UnitSize, 0, 16);
1816     else
1817       return false;
1818   } else {
1819     if (ShuffleKind == 1) // unary
1820       return isVMerge(N, UnitSize, 8, 8);
1821     else if (ShuffleKind == 0) // normal
1822       return isVMerge(N, UnitSize, 8, 24);
1823     else
1824       return false;
1825   }
1826 }
1827 
1828 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1829 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1830 /// The ShuffleKind distinguishes between big-endian merges with two
1831 /// different inputs (0), either-endian merges with two identical inputs (1),
1832 /// and little-endian merges with two different inputs (2).  For the latter,
1833 /// the input operands are swapped (see PPCInstrAltivec.td).
1834 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1835                              unsigned ShuffleKind, SelectionDAG &DAG) {
1836   if (DAG.getDataLayout().isLittleEndian()) {
1837     if (ShuffleKind == 1) // unary
1838       return isVMerge(N, UnitSize, 8, 8);
1839     else if (ShuffleKind == 2) // swapped
1840       return isVMerge(N, UnitSize, 8, 24);
1841     else
1842       return false;
1843   } else {
1844     if (ShuffleKind == 1) // unary
1845       return isVMerge(N, UnitSize, 0, 0);
1846     else if (ShuffleKind == 0) // normal
1847       return isVMerge(N, UnitSize, 0, 16);
1848     else
1849       return false;
1850   }
1851 }
1852 
1853 /**
1854  * Common function used to match vmrgew and vmrgow shuffles
1855  *
1856  * The indexOffset determines whether to look for even or odd words in
1857  * the shuffle mask. This is based on the of the endianness of the target
1858  * machine.
1859  *   - Little Endian:
1860  *     - Use offset of 0 to check for odd elements
1861  *     - Use offset of 4 to check for even elements
1862  *   - Big Endian:
1863  *     - Use offset of 0 to check for even elements
1864  *     - Use offset of 4 to check for odd elements
1865  * A detailed description of the vector element ordering for little endian and
1866  * big endian can be found at
1867  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1868  * Targeting your applications - what little endian and big endian IBM XL C/C++
1869  * compiler differences mean to you
1870  *
1871  * The mask to the shuffle vector instruction specifies the indices of the
1872  * elements from the two input vectors to place in the result. The elements are
1873  * numbered in array-access order, starting with the first vector. These vectors
1874  * are always of type v16i8, thus each vector will contain 16 elements of size
1875  * 8. More info on the shuffle vector can be found in the
1876  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1877  * Language Reference.
1878  *
1879  * The RHSStartValue indicates whether the same input vectors are used (unary)
1880  * or two different input vectors are used, based on the following:
1881  *   - If the instruction uses the same vector for both inputs, the range of the
1882  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1883  *     be 0.
1884  *   - If the instruction has two different vectors then the range of the
1885  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1886  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1887  *     to 31 specify elements in the second vector).
1888  *
1889  * \param[in] N The shuffle vector SD Node to analyze
1890  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1891  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1892  * vector to the shuffle_vector instruction
1893  * \return true iff this shuffle vector represents an even or odd word merge
1894  */
1895 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1896                      unsigned RHSStartValue) {
1897   if (N->getValueType(0) != MVT::v16i8)
1898     return false;
1899 
1900   for (unsigned i = 0; i < 2; ++i)
1901     for (unsigned j = 0; j < 4; ++j)
1902       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1903                              i*RHSStartValue+j+IndexOffset) ||
1904           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1905                              i*RHSStartValue+j+IndexOffset+8))
1906         return false;
1907   return true;
1908 }
1909 
1910 /**
1911  * Determine if the specified shuffle mask is suitable for the vmrgew or
1912  * vmrgow instructions.
1913  *
1914  * \param[in] N The shuffle vector SD Node to analyze
1915  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1916  * \param[in] ShuffleKind Identify the type of merge:
1917  *   - 0 = big-endian merge with two different inputs;
1918  *   - 1 = either-endian merge with two identical inputs;
1919  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1920  *     little-endian merges).
1921  * \param[in] DAG The current SelectionDAG
1922  * \return true iff this shuffle mask
1923  */
1924 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1925                               unsigned ShuffleKind, SelectionDAG &DAG) {
1926   if (DAG.getDataLayout().isLittleEndian()) {
1927     unsigned indexOffset = CheckEven ? 4 : 0;
1928     if (ShuffleKind == 1) // Unary
1929       return isVMerge(N, indexOffset, 0);
1930     else if (ShuffleKind == 2) // swapped
1931       return isVMerge(N, indexOffset, 16);
1932     else
1933       return false;
1934   }
1935   else {
1936     unsigned indexOffset = CheckEven ? 0 : 4;
1937     if (ShuffleKind == 1) // Unary
1938       return isVMerge(N, indexOffset, 0);
1939     else if (ShuffleKind == 0) // Normal
1940       return isVMerge(N, indexOffset, 16);
1941     else
1942       return false;
1943   }
1944   return false;
1945 }
1946 
1947 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1948 /// amount, otherwise return -1.
1949 /// The ShuffleKind distinguishes between big-endian operations with two
1950 /// different inputs (0), either-endian operations with two identical inputs
1951 /// (1), and little-endian operations with two different inputs (2).  For the
1952 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1953 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1954                              SelectionDAG &DAG) {
1955   if (N->getValueType(0) != MVT::v16i8)
1956     return -1;
1957 
1958   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1959 
1960   // Find the first non-undef value in the shuffle mask.
1961   unsigned i;
1962   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1963     /*search*/;
1964 
1965   if (i == 16) return -1;  // all undef.
1966 
1967   // Otherwise, check to see if the rest of the elements are consecutively
1968   // numbered from this value.
1969   unsigned ShiftAmt = SVOp->getMaskElt(i);
1970   if (ShiftAmt < i) return -1;
1971 
1972   ShiftAmt -= i;
1973   bool isLE = DAG.getDataLayout().isLittleEndian();
1974 
1975   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1976     // Check the rest of the elements to see if they are consecutive.
1977     for (++i; i != 16; ++i)
1978       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1979         return -1;
1980   } else if (ShuffleKind == 1) {
1981     // Check the rest of the elements to see if they are consecutive.
1982     for (++i; i != 16; ++i)
1983       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1984         return -1;
1985   } else
1986     return -1;
1987 
1988   if (isLE)
1989     ShiftAmt = 16 - ShiftAmt;
1990 
1991   return ShiftAmt;
1992 }
1993 
1994 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1995 /// specifies a splat of a single element that is suitable for input to
1996 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1997 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1998   assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1999          EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2000 
2001   // The consecutive indices need to specify an element, not part of two
2002   // different elements.  So abandon ship early if this isn't the case.
2003   if (N->getMaskElt(0) % EltSize != 0)
2004     return false;
2005 
2006   // This is a splat operation if each element of the permute is the same, and
2007   // if the value doesn't reference the second vector.
2008   unsigned ElementBase = N->getMaskElt(0);
2009 
2010   // FIXME: Handle UNDEF elements too!
2011   if (ElementBase >= 16)
2012     return false;
2013 
2014   // Check that the indices are consecutive, in the case of a multi-byte element
2015   // splatted with a v16i8 mask.
2016   for (unsigned i = 1; i != EltSize; ++i)
2017     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2018       return false;
2019 
2020   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2021     if (N->getMaskElt(i) < 0) continue;
2022     for (unsigned j = 0; j != EltSize; ++j)
2023       if (N->getMaskElt(i+j) != N->getMaskElt(j))
2024         return false;
2025   }
2026   return true;
2027 }
2028 
2029 /// Check that the mask is shuffling N byte elements. Within each N byte
2030 /// element of the mask, the indices could be either in increasing or
2031 /// decreasing order as long as they are consecutive.
2032 /// \param[in] N the shuffle vector SD Node to analyze
2033 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2034 /// Word/DoubleWord/QuadWord).
2035 /// \param[in] StepLen the delta indices number among the N byte element, if
2036 /// the mask is in increasing/decreasing order then it is 1/-1.
2037 /// \return true iff the mask is shuffling N byte elements.
2038 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2039                                    int StepLen) {
2040   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2041          "Unexpected element width.");
2042   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2043 
2044   unsigned NumOfElem = 16 / Width;
2045   unsigned MaskVal[16]; //  Width is never greater than 16
2046   for (unsigned i = 0; i < NumOfElem; ++i) {
2047     MaskVal[0] = N->getMaskElt(i * Width);
2048     if ((StepLen == 1) && (MaskVal[0] % Width)) {
2049       return false;
2050     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2051       return false;
2052     }
2053 
2054     for (unsigned int j = 1; j < Width; ++j) {
2055       MaskVal[j] = N->getMaskElt(i * Width + j);
2056       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2057         return false;
2058       }
2059     }
2060   }
2061 
2062   return true;
2063 }
2064 
2065 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2066                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2067   if (!isNByteElemShuffleMask(N, 4, 1))
2068     return false;
2069 
2070   // Now we look at mask elements 0,4,8,12
2071   unsigned M0 = N->getMaskElt(0) / 4;
2072   unsigned M1 = N->getMaskElt(4) / 4;
2073   unsigned M2 = N->getMaskElt(8) / 4;
2074   unsigned M3 = N->getMaskElt(12) / 4;
2075   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2076   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2077 
2078   // Below, let H and L be arbitrary elements of the shuffle mask
2079   // where H is in the range [4,7] and L is in the range [0,3].
2080   // H, 1, 2, 3 or L, 5, 6, 7
2081   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2082       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2083     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2084     InsertAtByte = IsLE ? 12 : 0;
2085     Swap = M0 < 4;
2086     return true;
2087   }
2088   // 0, H, 2, 3 or 4, L, 6, 7
2089   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2090       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2091     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2092     InsertAtByte = IsLE ? 8 : 4;
2093     Swap = M1 < 4;
2094     return true;
2095   }
2096   // 0, 1, H, 3 or 4, 5, L, 7
2097   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2098       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2099     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2100     InsertAtByte = IsLE ? 4 : 8;
2101     Swap = M2 < 4;
2102     return true;
2103   }
2104   // 0, 1, 2, H or 4, 5, 6, L
2105   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2106       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2107     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2108     InsertAtByte = IsLE ? 0 : 12;
2109     Swap = M3 < 4;
2110     return true;
2111   }
2112 
2113   // If both vector operands for the shuffle are the same vector, the mask will
2114   // contain only elements from the first one and the second one will be undef.
2115   if (N->getOperand(1).isUndef()) {
2116     ShiftElts = 0;
2117     Swap = true;
2118     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2119     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2120       InsertAtByte = IsLE ? 12 : 0;
2121       return true;
2122     }
2123     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2124       InsertAtByte = IsLE ? 8 : 4;
2125       return true;
2126     }
2127     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2128       InsertAtByte = IsLE ? 4 : 8;
2129       return true;
2130     }
2131     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2132       InsertAtByte = IsLE ? 0 : 12;
2133       return true;
2134     }
2135   }
2136 
2137   return false;
2138 }
2139 
2140 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2141                                bool &Swap, bool IsLE) {
2142   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2143   // Ensure each byte index of the word is consecutive.
2144   if (!isNByteElemShuffleMask(N, 4, 1))
2145     return false;
2146 
2147   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2148   unsigned M0 = N->getMaskElt(0) / 4;
2149   unsigned M1 = N->getMaskElt(4) / 4;
2150   unsigned M2 = N->getMaskElt(8) / 4;
2151   unsigned M3 = N->getMaskElt(12) / 4;
2152 
2153   // If both vector operands for the shuffle are the same vector, the mask will
2154   // contain only elements from the first one and the second one will be undef.
2155   if (N->getOperand(1).isUndef()) {
2156     assert(M0 < 4 && "Indexing into an undef vector?");
2157     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2158       return false;
2159 
2160     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2161     Swap = false;
2162     return true;
2163   }
2164 
2165   // Ensure each word index of the ShuffleVector Mask is consecutive.
2166   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2167     return false;
2168 
2169   if (IsLE) {
2170     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2171       // Input vectors don't need to be swapped if the leading element
2172       // of the result is one of the 3 left elements of the second vector
2173       // (or if there is no shift to be done at all).
2174       Swap = false;
2175       ShiftElts = (8 - M0) % 8;
2176     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2177       // Input vectors need to be swapped if the leading element
2178       // of the result is one of the 3 left elements of the first vector
2179       // (or if we're shifting by 4 - thereby simply swapping the vectors).
2180       Swap = true;
2181       ShiftElts = (4 - M0) % 4;
2182     }
2183 
2184     return true;
2185   } else {                                          // BE
2186     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2187       // Input vectors don't need to be swapped if the leading element
2188       // of the result is one of the 4 elements of the first vector.
2189       Swap = false;
2190       ShiftElts = M0;
2191     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2192       // Input vectors need to be swapped if the leading element
2193       // of the result is one of the 4 elements of the right vector.
2194       Swap = true;
2195       ShiftElts = M0 - 4;
2196     }
2197 
2198     return true;
2199   }
2200 }
2201 
2202 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2203   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2204 
2205   if (!isNByteElemShuffleMask(N, Width, -1))
2206     return false;
2207 
2208   for (int i = 0; i < 16; i += Width)
2209     if (N->getMaskElt(i) != i + Width - 1)
2210       return false;
2211 
2212   return true;
2213 }
2214 
2215 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2216   return isXXBRShuffleMaskHelper(N, 2);
2217 }
2218 
2219 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2220   return isXXBRShuffleMaskHelper(N, 4);
2221 }
2222 
2223 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2224   return isXXBRShuffleMaskHelper(N, 8);
2225 }
2226 
2227 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2228   return isXXBRShuffleMaskHelper(N, 16);
2229 }
2230 
2231 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2232 /// if the inputs to the instruction should be swapped and set \p DM to the
2233 /// value for the immediate.
2234 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2235 /// AND element 0 of the result comes from the first input (LE) or second input
2236 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2237 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2238 /// mask.
2239 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2240                                bool &Swap, bool IsLE) {
2241   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2242 
2243   // Ensure each byte index of the double word is consecutive.
2244   if (!isNByteElemShuffleMask(N, 8, 1))
2245     return false;
2246 
2247   unsigned M0 = N->getMaskElt(0) / 8;
2248   unsigned M1 = N->getMaskElt(8) / 8;
2249   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2250 
2251   // If both vector operands for the shuffle are the same vector, the mask will
2252   // contain only elements from the first one and the second one will be undef.
2253   if (N->getOperand(1).isUndef()) {
2254     if ((M0 | M1) < 2) {
2255       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2256       Swap = false;
2257       return true;
2258     } else
2259       return false;
2260   }
2261 
2262   if (IsLE) {
2263     if (M0 > 1 && M1 < 2) {
2264       Swap = false;
2265     } else if (M0 < 2 && M1 > 1) {
2266       M0 = (M0 + 2) % 4;
2267       M1 = (M1 + 2) % 4;
2268       Swap = true;
2269     } else
2270       return false;
2271 
2272     // Note: if control flow comes here that means Swap is already set above
2273     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2274     return true;
2275   } else { // BE
2276     if (M0 < 2 && M1 > 1) {
2277       Swap = false;
2278     } else if (M0 > 1 && M1 < 2) {
2279       M0 = (M0 + 2) % 4;
2280       M1 = (M1 + 2) % 4;
2281       Swap = true;
2282     } else
2283       return false;
2284 
2285     // Note: if control flow comes here that means Swap is already set above
2286     DM = (M0 << 1) + (M1 & 1);
2287     return true;
2288   }
2289 }
2290 
2291 
2292 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2293 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2294 /// elements are counted from the left of the vector register).
2295 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2296                                          SelectionDAG &DAG) {
2297   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2298   assert(isSplatShuffleMask(SVOp, EltSize));
2299   if (DAG.getDataLayout().isLittleEndian())
2300     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2301   else
2302     return SVOp->getMaskElt(0) / EltSize;
2303 }
2304 
2305 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2306 /// by using a vspltis[bhw] instruction of the specified element size, return
2307 /// the constant being splatted.  The ByteSize field indicates the number of
2308 /// bytes of each element [124] -> [bhw].
2309 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2310   SDValue OpVal(nullptr, 0);
2311 
2312   // If ByteSize of the splat is bigger than the element size of the
2313   // build_vector, then we have a case where we are checking for a splat where
2314   // multiple elements of the buildvector are folded together into a single
2315   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2316   unsigned EltSize = 16/N->getNumOperands();
2317   if (EltSize < ByteSize) {
2318     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2319     SDValue UniquedVals[4];
2320     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2321 
2322     // See if all of the elements in the buildvector agree across.
2323     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2324       if (N->getOperand(i).isUndef()) continue;
2325       // If the element isn't a constant, bail fully out.
2326       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2327 
2328       if (!UniquedVals[i&(Multiple-1)].getNode())
2329         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2330       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2331         return SDValue();  // no match.
2332     }
2333 
2334     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2335     // either constant or undef values that are identical for each chunk.  See
2336     // if these chunks can form into a larger vspltis*.
2337 
2338     // Check to see if all of the leading entries are either 0 or -1.  If
2339     // neither, then this won't fit into the immediate field.
2340     bool LeadingZero = true;
2341     bool LeadingOnes = true;
2342     for (unsigned i = 0; i != Multiple-1; ++i) {
2343       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2344 
2345       LeadingZero &= isNullConstant(UniquedVals[i]);
2346       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2347     }
2348     // Finally, check the least significant entry.
2349     if (LeadingZero) {
2350       if (!UniquedVals[Multiple-1].getNode())
2351         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2352       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2353       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2354         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2355     }
2356     if (LeadingOnes) {
2357       if (!UniquedVals[Multiple-1].getNode())
2358         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2359       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2360       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2361         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2362     }
2363 
2364     return SDValue();
2365   }
2366 
2367   // Check to see if this buildvec has a single non-undef value in its elements.
2368   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2369     if (N->getOperand(i).isUndef()) continue;
2370     if (!OpVal.getNode())
2371       OpVal = N->getOperand(i);
2372     else if (OpVal != N->getOperand(i))
2373       return SDValue();
2374   }
2375 
2376   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2377 
2378   unsigned ValSizeInBytes = EltSize;
2379   uint64_t Value = 0;
2380   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2381     Value = CN->getZExtValue();
2382   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2383     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2384     Value = FloatToBits(CN->getValueAPF().convertToFloat());
2385   }
2386 
2387   // If the splat value is larger than the element value, then we can never do
2388   // this splat.  The only case that we could fit the replicated bits into our
2389   // immediate field for would be zero, and we prefer to use vxor for it.
2390   if (ValSizeInBytes < ByteSize) return SDValue();
2391 
2392   // If the element value is larger than the splat value, check if it consists
2393   // of a repeated bit pattern of size ByteSize.
2394   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2395     return SDValue();
2396 
2397   // Properly sign extend the value.
2398   int MaskVal = SignExtend32(Value, ByteSize * 8);
2399 
2400   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2401   if (MaskVal == 0) return SDValue();
2402 
2403   // Finally, if this value fits in a 5 bit sext field, return it
2404   if (SignExtend32<5>(MaskVal) == MaskVal)
2405     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2406   return SDValue();
2407 }
2408 
2409 //===----------------------------------------------------------------------===//
2410 //  Addressing Mode Selection
2411 //===----------------------------------------------------------------------===//
2412 
2413 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2414 /// or 64-bit immediate, and if the value can be accurately represented as a
2415 /// sign extension from a 16-bit value.  If so, this returns true and the
2416 /// immediate.
2417 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2418   if (!isa<ConstantSDNode>(N))
2419     return false;
2420 
2421   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2422   if (N->getValueType(0) == MVT::i32)
2423     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2424   else
2425     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2426 }
2427 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2428   return isIntS16Immediate(Op.getNode(), Imm);
2429 }
2430 
2431 
2432 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2433 /// be represented as an indexed [r+r] operation.
2434 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2435                                                SDValue &Index,
2436                                                SelectionDAG &DAG) const {
2437   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2438       UI != E; ++UI) {
2439     if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2440       if (Memop->getMemoryVT() == MVT::f64) {
2441           Base = N.getOperand(0);
2442           Index = N.getOperand(1);
2443           return true;
2444       }
2445     }
2446   }
2447   return false;
2448 }
2449 
2450 /// isIntS34Immediate - This method tests if value of node given can be
2451 /// accurately represented as a sign extension from a 34-bit value.  If so,
2452 /// this returns true and the immediate.
2453 bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2454   if (!isa<ConstantSDNode>(N))
2455     return false;
2456 
2457   Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2458   return isInt<34>(Imm);
2459 }
2460 bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2461   return isIntS34Immediate(Op.getNode(), Imm);
2462 }
2463 
2464 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2465 /// can be represented as an indexed [r+r] operation.  Returns false if it
2466 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2467 /// non-zero and N can be represented by a base register plus a signed 16-bit
2468 /// displacement, make a more precise judgement by checking (displacement % \p
2469 /// EncodingAlignment).
2470 bool PPCTargetLowering::SelectAddressRegReg(
2471     SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2472     MaybeAlign EncodingAlignment) const {
2473   // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2474   // a [pc+imm].
2475   if (SelectAddressPCRel(N, Base))
2476     return false;
2477 
2478   int16_t Imm = 0;
2479   if (N.getOpcode() == ISD::ADD) {
2480     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2481     // SPE load/store can only handle 8-bit offsets.
2482     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2483         return true;
2484     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2485         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2486       return false; // r+i
2487     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2488       return false;    // r+i
2489 
2490     Base = N.getOperand(0);
2491     Index = N.getOperand(1);
2492     return true;
2493   } else if (N.getOpcode() == ISD::OR) {
2494     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2495         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2496       return false; // r+i can fold it if we can.
2497 
2498     // If this is an or of disjoint bitfields, we can codegen this as an add
2499     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2500     // disjoint.
2501     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2502 
2503     if (LHSKnown.Zero.getBoolValue()) {
2504       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2505       // If all of the bits are known zero on the LHS or RHS, the add won't
2506       // carry.
2507       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2508         Base = N.getOperand(0);
2509         Index = N.getOperand(1);
2510         return true;
2511       }
2512     }
2513   }
2514 
2515   return false;
2516 }
2517 
2518 // If we happen to be doing an i64 load or store into a stack slot that has
2519 // less than a 4-byte alignment, then the frame-index elimination may need to
2520 // use an indexed load or store instruction (because the offset may not be a
2521 // multiple of 4). The extra register needed to hold the offset comes from the
2522 // register scavenger, and it is possible that the scavenger will need to use
2523 // an emergency spill slot. As a result, we need to make sure that a spill slot
2524 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2525 // stack slot.
2526 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2527   // FIXME: This does not handle the LWA case.
2528   if (VT != MVT::i64)
2529     return;
2530 
2531   // NOTE: We'll exclude negative FIs here, which come from argument
2532   // lowering, because there are no known test cases triggering this problem
2533   // using packed structures (or similar). We can remove this exclusion if
2534   // we find such a test case. The reason why this is so test-case driven is
2535   // because this entire 'fixup' is only to prevent crashes (from the
2536   // register scavenger) on not-really-valid inputs. For example, if we have:
2537   //   %a = alloca i1
2538   //   %b = bitcast i1* %a to i64*
2539   //   store i64* a, i64 b
2540   // then the store should really be marked as 'align 1', but is not. If it
2541   // were marked as 'align 1' then the indexed form would have been
2542   // instruction-selected initially, and the problem this 'fixup' is preventing
2543   // won't happen regardless.
2544   if (FrameIdx < 0)
2545     return;
2546 
2547   MachineFunction &MF = DAG.getMachineFunction();
2548   MachineFrameInfo &MFI = MF.getFrameInfo();
2549 
2550   if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2551     return;
2552 
2553   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2554   FuncInfo->setHasNonRISpills();
2555 }
2556 
2557 /// Returns true if the address N can be represented by a base register plus
2558 /// a signed 16-bit displacement [r+imm], and if it is not better
2559 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2560 /// displacements that are multiples of that value.
2561 bool PPCTargetLowering::SelectAddressRegImm(
2562     SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2563     MaybeAlign EncodingAlignment) const {
2564   // FIXME dl should come from parent load or store, not from address
2565   SDLoc dl(N);
2566 
2567   // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2568   // a [pc+imm].
2569   if (SelectAddressPCRel(N, Base))
2570     return false;
2571 
2572   // If this can be more profitably realized as r+r, fail.
2573   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2574     return false;
2575 
2576   if (N.getOpcode() == ISD::ADD) {
2577     int16_t imm = 0;
2578     if (isIntS16Immediate(N.getOperand(1), imm) &&
2579         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2580       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2581       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2582         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2583         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2584       } else {
2585         Base = N.getOperand(0);
2586       }
2587       return true; // [r+i]
2588     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2589       // Match LOAD (ADD (X, Lo(G))).
2590       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2591              && "Cannot handle constant offsets yet!");
2592       Disp = N.getOperand(1).getOperand(0);  // The global address.
2593       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2594              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2595              Disp.getOpcode() == ISD::TargetConstantPool ||
2596              Disp.getOpcode() == ISD::TargetJumpTable);
2597       Base = N.getOperand(0);
2598       return true;  // [&g+r]
2599     }
2600   } else if (N.getOpcode() == ISD::OR) {
2601     int16_t imm = 0;
2602     if (isIntS16Immediate(N.getOperand(1), imm) &&
2603         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2604       // If this is an or of disjoint bitfields, we can codegen this as an add
2605       // (for better address arithmetic) if the LHS and RHS of the OR are
2606       // provably disjoint.
2607       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2608 
2609       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2610         // If all of the bits are known zero on the LHS or RHS, the add won't
2611         // carry.
2612         if (FrameIndexSDNode *FI =
2613               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2614           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2615           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2616         } else {
2617           Base = N.getOperand(0);
2618         }
2619         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2620         return true;
2621       }
2622     }
2623   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2624     // Loading from a constant address.
2625 
2626     // If this address fits entirely in a 16-bit sext immediate field, codegen
2627     // this as "d, 0"
2628     int16_t Imm;
2629     if (isIntS16Immediate(CN, Imm) &&
2630         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2631       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2632       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2633                              CN->getValueType(0));
2634       return true;
2635     }
2636 
2637     // Handle 32-bit sext immediates with LIS + addr mode.
2638     if ((CN->getValueType(0) == MVT::i32 ||
2639          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2640         (!EncodingAlignment ||
2641          isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2642       int Addr = (int)CN->getZExtValue();
2643 
2644       // Otherwise, break this down into an LIS + disp.
2645       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2646 
2647       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2648                                    MVT::i32);
2649       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2650       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2651       return true;
2652     }
2653   }
2654 
2655   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2656   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2657     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2658     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2659   } else
2660     Base = N;
2661   return true;      // [r+0]
2662 }
2663 
2664 /// Similar to the 16-bit case but for instructions that take a 34-bit
2665 /// displacement field (prefixed loads/stores).
2666 bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2667                                               SDValue &Base,
2668                                               SelectionDAG &DAG) const {
2669   // Only on 64-bit targets.
2670   if (N.getValueType() != MVT::i64)
2671     return false;
2672 
2673   SDLoc dl(N);
2674   int64_t Imm = 0;
2675 
2676   if (N.getOpcode() == ISD::ADD) {
2677     if (!isIntS34Immediate(N.getOperand(1), Imm))
2678       return false;
2679     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2680     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2681       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2682     else
2683       Base = N.getOperand(0);
2684     return true;
2685   }
2686 
2687   if (N.getOpcode() == ISD::OR) {
2688     if (!isIntS34Immediate(N.getOperand(1), Imm))
2689       return false;
2690     // If this is an or of disjoint bitfields, we can codegen this as an add
2691     // (for better address arithmetic) if the LHS and RHS of the OR are
2692     // provably disjoint.
2693     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2694     if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2695       return false;
2696     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2697       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2698     else
2699       Base = N.getOperand(0);
2700     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2701     return true;
2702   }
2703 
2704   if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2705     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2706     Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2707     return true;
2708   }
2709 
2710   return false;
2711 }
2712 
2713 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2714 /// represented as an indexed [r+r] operation.
2715 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2716                                                 SDValue &Index,
2717                                                 SelectionDAG &DAG) const {
2718   // Check to see if we can easily represent this as an [r+r] address.  This
2719   // will fail if it thinks that the address is more profitably represented as
2720   // reg+imm, e.g. where imm = 0.
2721   if (SelectAddressRegReg(N, Base, Index, DAG))
2722     return true;
2723 
2724   // If the address is the result of an add, we will utilize the fact that the
2725   // address calculation includes an implicit add.  However, we can reduce
2726   // register pressure if we do not materialize a constant just for use as the
2727   // index register.  We only get rid of the add if it is not an add of a
2728   // value and a 16-bit signed constant and both have a single use.
2729   int16_t imm = 0;
2730   if (N.getOpcode() == ISD::ADD &&
2731       (!isIntS16Immediate(N.getOperand(1), imm) ||
2732        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2733     Base = N.getOperand(0);
2734     Index = N.getOperand(1);
2735     return true;
2736   }
2737 
2738   // Otherwise, do it the hard way, using R0 as the base register.
2739   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2740                          N.getValueType());
2741   Index = N;
2742   return true;
2743 }
2744 
2745 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2746   Ty *PCRelCand = dyn_cast<Ty>(N);
2747   return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2748 }
2749 
2750 /// Returns true if this address is a PC Relative address.
2751 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2752 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2753 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2754   // This is a materialize PC Relative node. Always select this as PC Relative.
2755   Base = N;
2756   if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2757     return true;
2758   if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2759       isValidPCRelNode<GlobalAddressSDNode>(N) ||
2760       isValidPCRelNode<JumpTableSDNode>(N) ||
2761       isValidPCRelNode<BlockAddressSDNode>(N))
2762     return true;
2763   return false;
2764 }
2765 
2766 /// Returns true if we should use a direct load into vector instruction
2767 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2768 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2769 
2770   // If there are any other uses other than scalar to vector, then we should
2771   // keep it as a scalar load -> direct move pattern to prevent multiple
2772   // loads.
2773   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2774   if (!LD)
2775     return false;
2776 
2777   EVT MemVT = LD->getMemoryVT();
2778   if (!MemVT.isSimple())
2779     return false;
2780   switch(MemVT.getSimpleVT().SimpleTy) {
2781   case MVT::i64:
2782     break;
2783   case MVT::i32:
2784     if (!ST.hasP8Vector())
2785       return false;
2786     break;
2787   case MVT::i16:
2788   case MVT::i8:
2789     if (!ST.hasP9Vector())
2790       return false;
2791     break;
2792   default:
2793     return false;
2794   }
2795 
2796   SDValue LoadedVal(N, 0);
2797   if (!LoadedVal.hasOneUse())
2798     return false;
2799 
2800   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2801        UI != UE; ++UI)
2802     if (UI.getUse().get().getResNo() == 0 &&
2803         UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2804         UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2805       return false;
2806 
2807   return true;
2808 }
2809 
2810 /// getPreIndexedAddressParts - returns true by value, base pointer and
2811 /// offset pointer and addressing mode by reference if the node's address
2812 /// can be legally represented as pre-indexed load / store address.
2813 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2814                                                   SDValue &Offset,
2815                                                   ISD::MemIndexedMode &AM,
2816                                                   SelectionDAG &DAG) const {
2817   if (DisablePPCPreinc) return false;
2818 
2819   bool isLoad = true;
2820   SDValue Ptr;
2821   EVT VT;
2822   unsigned Alignment;
2823   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2824     Ptr = LD->getBasePtr();
2825     VT = LD->getMemoryVT();
2826     Alignment = LD->getAlignment();
2827   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2828     Ptr = ST->getBasePtr();
2829     VT  = ST->getMemoryVT();
2830     Alignment = ST->getAlignment();
2831     isLoad = false;
2832   } else
2833     return false;
2834 
2835   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2836   // instructions because we can fold these into a more efficient instruction
2837   // instead, (such as LXSD).
2838   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2839     return false;
2840   }
2841 
2842   // PowerPC doesn't have preinc load/store instructions for vectors
2843   if (VT.isVector())
2844     return false;
2845 
2846   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2847     // Common code will reject creating a pre-inc form if the base pointer
2848     // is a frame index, or if N is a store and the base pointer is either
2849     // the same as or a predecessor of the value being stored.  Check for
2850     // those situations here, and try with swapped Base/Offset instead.
2851     bool Swap = false;
2852 
2853     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2854       Swap = true;
2855     else if (!isLoad) {
2856       SDValue Val = cast<StoreSDNode>(N)->getValue();
2857       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2858         Swap = true;
2859     }
2860 
2861     if (Swap)
2862       std::swap(Base, Offset);
2863 
2864     AM = ISD::PRE_INC;
2865     return true;
2866   }
2867 
2868   // LDU/STU can only handle immediates that are a multiple of 4.
2869   if (VT != MVT::i64) {
2870     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2871       return false;
2872   } else {
2873     // LDU/STU need an address with at least 4-byte alignment.
2874     if (Alignment < 4)
2875       return false;
2876 
2877     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2878       return false;
2879   }
2880 
2881   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2882     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2883     // sext i32 to i64 when addr mode is r+i.
2884     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2885         LD->getExtensionType() == ISD::SEXTLOAD &&
2886         isa<ConstantSDNode>(Offset))
2887       return false;
2888   }
2889 
2890   AM = ISD::PRE_INC;
2891   return true;
2892 }
2893 
2894 //===----------------------------------------------------------------------===//
2895 //  LowerOperation implementation
2896 //===----------------------------------------------------------------------===//
2897 
2898 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2899 /// and LoOpFlags to the target MO flags.
2900 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2901                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2902                                const GlobalValue *GV = nullptr) {
2903   HiOpFlags = PPCII::MO_HA;
2904   LoOpFlags = PPCII::MO_LO;
2905 
2906   // Don't use the pic base if not in PIC relocation model.
2907   if (IsPIC) {
2908     HiOpFlags |= PPCII::MO_PIC_FLAG;
2909     LoOpFlags |= PPCII::MO_PIC_FLAG;
2910   }
2911 }
2912 
2913 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2914                              SelectionDAG &DAG) {
2915   SDLoc DL(HiPart);
2916   EVT PtrVT = HiPart.getValueType();
2917   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2918 
2919   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2920   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2921 
2922   // With PIC, the first instruction is actually "GR+hi(&G)".
2923   if (isPIC)
2924     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2925                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2926 
2927   // Generate non-pic code that has direct accesses to the constant pool.
2928   // The address of the global is just (hi(&g)+lo(&g)).
2929   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2930 }
2931 
2932 static void setUsesTOCBasePtr(MachineFunction &MF) {
2933   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2934   FuncInfo->setUsesTOCBasePtr();
2935 }
2936 
2937 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2938   setUsesTOCBasePtr(DAG.getMachineFunction());
2939 }
2940 
2941 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2942                                        SDValue GA) const {
2943   const bool Is64Bit = Subtarget.isPPC64();
2944   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2945   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2946                         : Subtarget.isAIXABI()
2947                               ? DAG.getRegister(PPC::R2, VT)
2948                               : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2949   SDValue Ops[] = { GA, Reg };
2950   return DAG.getMemIntrinsicNode(
2951       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2952       MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2953       MachineMemOperand::MOLoad);
2954 }
2955 
2956 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2957                                              SelectionDAG &DAG) const {
2958   EVT PtrVT = Op.getValueType();
2959   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2960   const Constant *C = CP->getConstVal();
2961 
2962   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2963   // The actual address of the GlobalValue is stored in the TOC.
2964   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2965     if (Subtarget.isUsingPCRelativeCalls()) {
2966       SDLoc DL(CP);
2967       EVT Ty = getPointerTy(DAG.getDataLayout());
2968       SDValue ConstPool = DAG.getTargetConstantPool(
2969           C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2970       return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2971     }
2972     setUsesTOCBasePtr(DAG);
2973     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2974     return getTOCEntry(DAG, SDLoc(CP), GA);
2975   }
2976 
2977   unsigned MOHiFlag, MOLoFlag;
2978   bool IsPIC = isPositionIndependent();
2979   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2980 
2981   if (IsPIC && Subtarget.isSVR4ABI()) {
2982     SDValue GA =
2983         DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2984     return getTOCEntry(DAG, SDLoc(CP), GA);
2985   }
2986 
2987   SDValue CPIHi =
2988       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2989   SDValue CPILo =
2990       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2991   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2992 }
2993 
2994 // For 64-bit PowerPC, prefer the more compact relative encodings.
2995 // This trades 32 bits per jump table entry for one or two instructions
2996 // on the jump site.
2997 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2998   if (isJumpTableRelative())
2999     return MachineJumpTableInfo::EK_LabelDifference32;
3000 
3001   return TargetLowering::getJumpTableEncoding();
3002 }
3003 
3004 bool PPCTargetLowering::isJumpTableRelative() const {
3005   if (UseAbsoluteJumpTables)
3006     return false;
3007   if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3008     return true;
3009   return TargetLowering::isJumpTableRelative();
3010 }
3011 
3012 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3013                                                     SelectionDAG &DAG) const {
3014   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3015     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3016 
3017   switch (getTargetMachine().getCodeModel()) {
3018   case CodeModel::Small:
3019   case CodeModel::Medium:
3020     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3021   default:
3022     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3023                        getPointerTy(DAG.getDataLayout()));
3024   }
3025 }
3026 
3027 const MCExpr *
3028 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3029                                                 unsigned JTI,
3030                                                 MCContext &Ctx) const {
3031   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3032     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3033 
3034   switch (getTargetMachine().getCodeModel()) {
3035   case CodeModel::Small:
3036   case CodeModel::Medium:
3037     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3038   default:
3039     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3040   }
3041 }
3042 
3043 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3044   EVT PtrVT = Op.getValueType();
3045   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3046 
3047   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3048   if (Subtarget.isUsingPCRelativeCalls()) {
3049     SDLoc DL(JT);
3050     EVT Ty = getPointerTy(DAG.getDataLayout());
3051     SDValue GA =
3052         DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3053     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3054     return MatAddr;
3055   }
3056 
3057   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3058   // The actual address of the GlobalValue is stored in the TOC.
3059   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3060     setUsesTOCBasePtr(DAG);
3061     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3062     return getTOCEntry(DAG, SDLoc(JT), GA);
3063   }
3064 
3065   unsigned MOHiFlag, MOLoFlag;
3066   bool IsPIC = isPositionIndependent();
3067   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3068 
3069   if (IsPIC && Subtarget.isSVR4ABI()) {
3070     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3071                                         PPCII::MO_PIC_FLAG);
3072     return getTOCEntry(DAG, SDLoc(GA), GA);
3073   }
3074 
3075   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3076   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3077   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3078 }
3079 
3080 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3081                                              SelectionDAG &DAG) const {
3082   EVT PtrVT = Op.getValueType();
3083   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3084   const BlockAddress *BA = BASDN->getBlockAddress();
3085 
3086   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3087   if (Subtarget.isUsingPCRelativeCalls()) {
3088     SDLoc DL(BASDN);
3089     EVT Ty = getPointerTy(DAG.getDataLayout());
3090     SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3091                                            PPCII::MO_PCREL_FLAG);
3092     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3093     return MatAddr;
3094   }
3095 
3096   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3097   // The actual BlockAddress is stored in the TOC.
3098   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3099     setUsesTOCBasePtr(DAG);
3100     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3101     return getTOCEntry(DAG, SDLoc(BASDN), GA);
3102   }
3103 
3104   // 32-bit position-independent ELF stores the BlockAddress in the .got.
3105   if (Subtarget.is32BitELFABI() && isPositionIndependent())
3106     return getTOCEntry(
3107         DAG, SDLoc(BASDN),
3108         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3109 
3110   unsigned MOHiFlag, MOLoFlag;
3111   bool IsPIC = isPositionIndependent();
3112   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3113   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3114   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3115   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3116 }
3117 
3118 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3119                                               SelectionDAG &DAG) const {
3120   // FIXME: TLS addresses currently use medium model code sequences,
3121   // which is the most useful form.  Eventually support for small and
3122   // large models could be added if users need it, at the cost of
3123   // additional complexity.
3124   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3125   if (DAG.getTarget().useEmulatedTLS())
3126     return LowerToTLSEmulatedModel(GA, DAG);
3127 
3128   SDLoc dl(GA);
3129   const GlobalValue *GV = GA->getGlobal();
3130   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3131   bool is64bit = Subtarget.isPPC64();
3132   const Module *M = DAG.getMachineFunction().getFunction().getParent();
3133   PICLevel::Level picLevel = M->getPICLevel();
3134 
3135   const TargetMachine &TM = getTargetMachine();
3136   TLSModel::Model Model = TM.getTLSModel(GV);
3137 
3138   if (Model == TLSModel::LocalExec) {
3139     if (Subtarget.isUsingPCRelativeCalls()) {
3140       SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3141       SDValue TGA = DAG.getTargetGlobalAddress(
3142           GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3143       SDValue MatAddr =
3144           DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3145       return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3146     }
3147 
3148     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3149                                                PPCII::MO_TPREL_HA);
3150     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3151                                                PPCII::MO_TPREL_LO);
3152     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3153                              : DAG.getRegister(PPC::R2, MVT::i32);
3154 
3155     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3156     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3157   }
3158 
3159   if (Model == TLSModel::InitialExec) {
3160     bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3161     SDValue TGA = DAG.getTargetGlobalAddress(
3162         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3163     SDValue TGATLS = DAG.getTargetGlobalAddress(
3164         GV, dl, PtrVT, 0,
3165         IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3166     SDValue TPOffset;
3167     if (IsPCRel) {
3168       SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3169       TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3170                              MachinePointerInfo());
3171     } else {
3172       SDValue GOTPtr;
3173       if (is64bit) {
3174         setUsesTOCBasePtr(DAG);
3175         SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3176         GOTPtr =
3177             DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3178       } else {
3179         if (!TM.isPositionIndependent())
3180           GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3181         else if (picLevel == PICLevel::SmallPIC)
3182           GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3183         else
3184           GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3185       }
3186       TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3187     }
3188     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3189   }
3190 
3191   if (Model == TLSModel::GeneralDynamic) {
3192     if (Subtarget.isUsingPCRelativeCalls()) {
3193       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3194                                                PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3195       return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3196     }
3197 
3198     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3199     SDValue GOTPtr;
3200     if (is64bit) {
3201       setUsesTOCBasePtr(DAG);
3202       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3203       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3204                                    GOTReg, TGA);
3205     } else {
3206       if (picLevel == PICLevel::SmallPIC)
3207         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3208       else
3209         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3210     }
3211     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3212                        GOTPtr, TGA, TGA);
3213   }
3214 
3215   if (Model == TLSModel::LocalDynamic) {
3216     if (Subtarget.isUsingPCRelativeCalls()) {
3217       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3218                                                PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3219       SDValue MatPCRel =
3220           DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3221       return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3222     }
3223 
3224     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3225     SDValue GOTPtr;
3226     if (is64bit) {
3227       setUsesTOCBasePtr(DAG);
3228       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3229       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3230                            GOTReg, TGA);
3231     } else {
3232       if (picLevel == PICLevel::SmallPIC)
3233         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3234       else
3235         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3236     }
3237     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3238                                   PtrVT, GOTPtr, TGA, TGA);
3239     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3240                                       PtrVT, TLSAddr, TGA);
3241     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3242   }
3243 
3244   llvm_unreachable("Unknown TLS model!");
3245 }
3246 
3247 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3248                                               SelectionDAG &DAG) const {
3249   EVT PtrVT = Op.getValueType();
3250   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3251   SDLoc DL(GSDN);
3252   const GlobalValue *GV = GSDN->getGlobal();
3253 
3254   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3255   // The actual address of the GlobalValue is stored in the TOC.
3256   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3257     if (Subtarget.isUsingPCRelativeCalls()) {
3258       EVT Ty = getPointerTy(DAG.getDataLayout());
3259       if (isAccessedAsGotIndirect(Op)) {
3260         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3261                                                 PPCII::MO_PCREL_FLAG |
3262                                                     PPCII::MO_GOT_FLAG);
3263         SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3264         SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3265                                    MachinePointerInfo());
3266         return Load;
3267       } else {
3268         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3269                                                 PPCII::MO_PCREL_FLAG);
3270         return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3271       }
3272     }
3273     setUsesTOCBasePtr(DAG);
3274     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3275     return getTOCEntry(DAG, DL, GA);
3276   }
3277 
3278   unsigned MOHiFlag, MOLoFlag;
3279   bool IsPIC = isPositionIndependent();
3280   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3281 
3282   if (IsPIC && Subtarget.isSVR4ABI()) {
3283     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3284                                             GSDN->getOffset(),
3285                                             PPCII::MO_PIC_FLAG);
3286     return getTOCEntry(DAG, DL, GA);
3287   }
3288 
3289   SDValue GAHi =
3290     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3291   SDValue GALo =
3292     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3293 
3294   return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3295 }
3296 
3297 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3298   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3299   SDLoc dl(Op);
3300 
3301   if (Op.getValueType() == MVT::v2i64) {
3302     // When the operands themselves are v2i64 values, we need to do something
3303     // special because VSX has no underlying comparison operations for these.
3304     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3305       // Equality can be handled by casting to the legal type for Altivec
3306       // comparisons, everything else needs to be expanded.
3307       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3308         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3309                  DAG.getSetCC(dl, MVT::v4i32,
3310                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3311                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3312                    CC));
3313       }
3314 
3315       return SDValue();
3316     }
3317 
3318     // We handle most of these in the usual way.
3319     return Op;
3320   }
3321 
3322   // If we're comparing for equality to zero, expose the fact that this is
3323   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3324   // fold the new nodes.
3325   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3326     return V;
3327 
3328   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3329     // Leave comparisons against 0 and -1 alone for now, since they're usually
3330     // optimized.  FIXME: revisit this when we can custom lower all setcc
3331     // optimizations.
3332     if (C->isAllOnesValue() || C->isNullValue())
3333       return SDValue();
3334   }
3335 
3336   // If we have an integer seteq/setne, turn it into a compare against zero
3337   // by xor'ing the rhs with the lhs, which is faster than setting a
3338   // condition register, reading it back out, and masking the correct bit.  The
3339   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3340   // the result to other bit-twiddling opportunities.
3341   EVT LHSVT = Op.getOperand(0).getValueType();
3342   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3343     EVT VT = Op.getValueType();
3344     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3345                                 Op.getOperand(1));
3346     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3347   }
3348   return SDValue();
3349 }
3350 
3351 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3352   SDNode *Node = Op.getNode();
3353   EVT VT = Node->getValueType(0);
3354   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3355   SDValue InChain = Node->getOperand(0);
3356   SDValue VAListPtr = Node->getOperand(1);
3357   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3358   SDLoc dl(Node);
3359 
3360   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3361 
3362   // gpr_index
3363   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3364                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3365   InChain = GprIndex.getValue(1);
3366 
3367   if (VT == MVT::i64) {
3368     // Check if GprIndex is even
3369     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3370                                  DAG.getConstant(1, dl, MVT::i32));
3371     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3372                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3373     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3374                                           DAG.getConstant(1, dl, MVT::i32));
3375     // Align GprIndex to be even if it isn't
3376     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3377                            GprIndex);
3378   }
3379 
3380   // fpr index is 1 byte after gpr
3381   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3382                                DAG.getConstant(1, dl, MVT::i32));
3383 
3384   // fpr
3385   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3386                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3387   InChain = FprIndex.getValue(1);
3388 
3389   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3390                                        DAG.getConstant(8, dl, MVT::i32));
3391 
3392   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3393                                         DAG.getConstant(4, dl, MVT::i32));
3394 
3395   // areas
3396   SDValue OverflowArea =
3397       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3398   InChain = OverflowArea.getValue(1);
3399 
3400   SDValue RegSaveArea =
3401       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3402   InChain = RegSaveArea.getValue(1);
3403 
3404   // select overflow_area if index > 8
3405   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3406                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3407 
3408   // adjustment constant gpr_index * 4/8
3409   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3410                                     VT.isInteger() ? GprIndex : FprIndex,
3411                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3412                                                     MVT::i32));
3413 
3414   // OurReg = RegSaveArea + RegConstant
3415   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3416                                RegConstant);
3417 
3418   // Floating types are 32 bytes into RegSaveArea
3419   if (VT.isFloatingPoint())
3420     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3421                          DAG.getConstant(32, dl, MVT::i32));
3422 
3423   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3424   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3425                                    VT.isInteger() ? GprIndex : FprIndex,
3426                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3427                                                    MVT::i32));
3428 
3429   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3430                               VT.isInteger() ? VAListPtr : FprPtr,
3431                               MachinePointerInfo(SV), MVT::i8);
3432 
3433   // determine if we should load from reg_save_area or overflow_area
3434   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3435 
3436   // increase overflow_area by 4/8 if gpr/fpr > 8
3437   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3438                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3439                                           dl, MVT::i32));
3440 
3441   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3442                              OverflowAreaPlusN);
3443 
3444   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3445                               MachinePointerInfo(), MVT::i32);
3446 
3447   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3448 }
3449 
3450 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3451   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3452 
3453   // We have to copy the entire va_list struct:
3454   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3455   return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3456                        DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3457                        false, true, false, MachinePointerInfo(),
3458                        MachinePointerInfo());
3459 }
3460 
3461 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3462                                                   SelectionDAG &DAG) const {
3463   if (Subtarget.isAIXABI())
3464     report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3465 
3466   return Op.getOperand(0);
3467 }
3468 
3469 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3470                                                 SelectionDAG &DAG) const {
3471   if (Subtarget.isAIXABI())
3472     report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3473 
3474   SDValue Chain = Op.getOperand(0);
3475   SDValue Trmp = Op.getOperand(1); // trampoline
3476   SDValue FPtr = Op.getOperand(2); // nested function
3477   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3478   SDLoc dl(Op);
3479 
3480   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3481   bool isPPC64 = (PtrVT == MVT::i64);
3482   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3483 
3484   TargetLowering::ArgListTy Args;
3485   TargetLowering::ArgListEntry Entry;
3486 
3487   Entry.Ty = IntPtrTy;
3488   Entry.Node = Trmp; Args.push_back(Entry);
3489 
3490   // TrampSize == (isPPC64 ? 48 : 40);
3491   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3492                                isPPC64 ? MVT::i64 : MVT::i32);
3493   Args.push_back(Entry);
3494 
3495   Entry.Node = FPtr; Args.push_back(Entry);
3496   Entry.Node = Nest; Args.push_back(Entry);
3497 
3498   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3499   TargetLowering::CallLoweringInfo CLI(DAG);
3500   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3501       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3502       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3503 
3504   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3505   return CallResult.second;
3506 }
3507 
3508 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3509   MachineFunction &MF = DAG.getMachineFunction();
3510   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3511   EVT PtrVT = getPointerTy(MF.getDataLayout());
3512 
3513   SDLoc dl(Op);
3514 
3515   if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3516     // vastart just stores the address of the VarArgsFrameIndex slot into the
3517     // memory location argument.
3518     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3519     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3520     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3521                         MachinePointerInfo(SV));
3522   }
3523 
3524   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3525   // We suppose the given va_list is already allocated.
3526   //
3527   // typedef struct {
3528   //  char gpr;     /* index into the array of 8 GPRs
3529   //                 * stored in the register save area
3530   //                 * gpr=0 corresponds to r3,
3531   //                 * gpr=1 to r4, etc.
3532   //                 */
3533   //  char fpr;     /* index into the array of 8 FPRs
3534   //                 * stored in the register save area
3535   //                 * fpr=0 corresponds to f1,
3536   //                 * fpr=1 to f2, etc.
3537   //                 */
3538   //  char *overflow_arg_area;
3539   //                /* location on stack that holds
3540   //                 * the next overflow argument
3541   //                 */
3542   //  char *reg_save_area;
3543   //               /* where r3:r10 and f1:f8 (if saved)
3544   //                * are stored
3545   //                */
3546   // } va_list[1];
3547 
3548   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3549   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3550   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3551                                             PtrVT);
3552   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3553                                  PtrVT);
3554 
3555   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3556   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3557 
3558   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3559   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3560 
3561   uint64_t FPROffset = 1;
3562   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3563 
3564   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3565 
3566   // Store first byte : number of int regs
3567   SDValue firstStore =
3568       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3569                         MachinePointerInfo(SV), MVT::i8);
3570   uint64_t nextOffset = FPROffset;
3571   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3572                                   ConstFPROffset);
3573 
3574   // Store second byte : number of float regs
3575   SDValue secondStore =
3576       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3577                         MachinePointerInfo(SV, nextOffset), MVT::i8);
3578   nextOffset += StackOffset;
3579   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3580 
3581   // Store second word : arguments given on stack
3582   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3583                                     MachinePointerInfo(SV, nextOffset));
3584   nextOffset += FrameOffset;
3585   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3586 
3587   // Store third word : arguments given in registers
3588   return DAG.getStore(thirdStore, dl, FR, nextPtr,
3589                       MachinePointerInfo(SV, nextOffset));
3590 }
3591 
3592 /// FPR - The set of FP registers that should be allocated for arguments
3593 /// on Darwin and AIX.
3594 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3595                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3596                                 PPC::F11, PPC::F12, PPC::F13};
3597 
3598 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3599 /// the stack.
3600 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3601                                        unsigned PtrByteSize) {
3602   unsigned ArgSize = ArgVT.getStoreSize();
3603   if (Flags.isByVal())
3604     ArgSize = Flags.getByValSize();
3605 
3606   // Round up to multiples of the pointer size, except for array members,
3607   // which are always packed.
3608   if (!Flags.isInConsecutiveRegs())
3609     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3610 
3611   return ArgSize;
3612 }
3613 
3614 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3615 /// on the stack.
3616 static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3617                                          ISD::ArgFlagsTy Flags,
3618                                          unsigned PtrByteSize) {
3619   Align Alignment(PtrByteSize);
3620 
3621   // Altivec parameters are padded to a 16 byte boundary.
3622   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3623       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3624       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3625       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3626     Alignment = Align(16);
3627 
3628   // ByVal parameters are aligned as requested.
3629   if (Flags.isByVal()) {
3630     auto BVAlign = Flags.getNonZeroByValAlign();
3631     if (BVAlign > PtrByteSize) {
3632       if (BVAlign.value() % PtrByteSize != 0)
3633         llvm_unreachable(
3634             "ByVal alignment is not a multiple of the pointer size");
3635 
3636       Alignment = BVAlign;
3637     }
3638   }
3639 
3640   // Array members are always packed to their original alignment.
3641   if (Flags.isInConsecutiveRegs()) {
3642     // If the array member was split into multiple registers, the first
3643     // needs to be aligned to the size of the full type.  (Except for
3644     // ppcf128, which is only aligned as its f64 components.)
3645     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3646       Alignment = Align(OrigVT.getStoreSize());
3647     else
3648       Alignment = Align(ArgVT.getStoreSize());
3649   }
3650 
3651   return Alignment;
3652 }
3653 
3654 /// CalculateStackSlotUsed - Return whether this argument will use its
3655 /// stack slot (instead of being passed in registers).  ArgOffset,
3656 /// AvailableFPRs, and AvailableVRs must hold the current argument
3657 /// position, and will be updated to account for this argument.
3658 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3659                                    unsigned PtrByteSize, unsigned LinkageSize,
3660                                    unsigned ParamAreaSize, unsigned &ArgOffset,
3661                                    unsigned &AvailableFPRs,
3662                                    unsigned &AvailableVRs) {
3663   bool UseMemory = false;
3664 
3665   // Respect alignment of argument on the stack.
3666   Align Alignment =
3667       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3668   ArgOffset = alignTo(ArgOffset, Alignment);
3669   // If there's no space left in the argument save area, we must
3670   // use memory (this check also catches zero-sized arguments).
3671   if (ArgOffset >= LinkageSize + ParamAreaSize)
3672     UseMemory = true;
3673 
3674   // Allocate argument on the stack.
3675   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3676   if (Flags.isInConsecutiveRegsLast())
3677     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3678   // If we overran the argument save area, we must use memory
3679   // (this check catches arguments passed partially in memory)
3680   if (ArgOffset > LinkageSize + ParamAreaSize)
3681     UseMemory = true;
3682 
3683   // However, if the argument is actually passed in an FPR or a VR,
3684   // we don't use memory after all.
3685   if (!Flags.isByVal()) {
3686     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3687       if (AvailableFPRs > 0) {
3688         --AvailableFPRs;
3689         return false;
3690       }
3691     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3692         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3693         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3694         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3695       if (AvailableVRs > 0) {
3696         --AvailableVRs;
3697         return false;
3698       }
3699   }
3700 
3701   return UseMemory;
3702 }
3703 
3704 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3705 /// ensure minimum alignment required for target.
3706 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3707                                      unsigned NumBytes) {
3708   return alignTo(NumBytes, Lowering->getStackAlign());
3709 }
3710 
3711 SDValue PPCTargetLowering::LowerFormalArguments(
3712     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3713     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3714     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3715   if (Subtarget.isAIXABI())
3716     return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3717                                     InVals);
3718   if (Subtarget.is64BitELFABI())
3719     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3720                                        InVals);
3721   assert(Subtarget.is32BitELFABI());
3722   return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3723                                      InVals);
3724 }
3725 
3726 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3727     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3728     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3729     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3730 
3731   // 32-bit SVR4 ABI Stack Frame Layout:
3732   //              +-----------------------------------+
3733   //        +-->  |            Back chain             |
3734   //        |     +-----------------------------------+
3735   //        |     | Floating-point register save area |
3736   //        |     +-----------------------------------+
3737   //        |     |    General register save area     |
3738   //        |     +-----------------------------------+
3739   //        |     |          CR save word             |
3740   //        |     +-----------------------------------+
3741   //        |     |         VRSAVE save word          |
3742   //        |     +-----------------------------------+
3743   //        |     |         Alignment padding         |
3744   //        |     +-----------------------------------+
3745   //        |     |     Vector register save area     |
3746   //        |     +-----------------------------------+
3747   //        |     |       Local variable space        |
3748   //        |     +-----------------------------------+
3749   //        |     |        Parameter list area        |
3750   //        |     +-----------------------------------+
3751   //        |     |           LR save word            |
3752   //        |     +-----------------------------------+
3753   // SP-->  +---  |            Back chain             |
3754   //              +-----------------------------------+
3755   //
3756   // Specifications:
3757   //   System V Application Binary Interface PowerPC Processor Supplement
3758   //   AltiVec Technology Programming Interface Manual
3759 
3760   MachineFunction &MF = DAG.getMachineFunction();
3761   MachineFrameInfo &MFI = MF.getFrameInfo();
3762   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3763 
3764   EVT PtrVT = getPointerTy(MF.getDataLayout());
3765   // Potential tail calls could cause overwriting of argument stack slots.
3766   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3767                        (CallConv == CallingConv::Fast));
3768   const Align PtrAlign(4);
3769 
3770   // Assign locations to all of the incoming arguments.
3771   SmallVector<CCValAssign, 16> ArgLocs;
3772   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3773                  *DAG.getContext());
3774 
3775   // Reserve space for the linkage area on the stack.
3776   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3777   CCInfo.AllocateStack(LinkageSize, PtrAlign);
3778   if (useSoftFloat())
3779     CCInfo.PreAnalyzeFormalArguments(Ins);
3780 
3781   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3782   CCInfo.clearWasPPCF128();
3783 
3784   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3785     CCValAssign &VA = ArgLocs[i];
3786 
3787     // Arguments stored in registers.
3788     if (VA.isRegLoc()) {
3789       const TargetRegisterClass *RC;
3790       EVT ValVT = VA.getValVT();
3791 
3792       switch (ValVT.getSimpleVT().SimpleTy) {
3793         default:
3794           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3795         case MVT::i1:
3796         case MVT::i32:
3797           RC = &PPC::GPRCRegClass;
3798           break;
3799         case MVT::f32:
3800           if (Subtarget.hasP8Vector())
3801             RC = &PPC::VSSRCRegClass;
3802           else if (Subtarget.hasSPE())
3803             RC = &PPC::GPRCRegClass;
3804           else
3805             RC = &PPC::F4RCRegClass;
3806           break;
3807         case MVT::f64:
3808           if (Subtarget.hasVSX())
3809             RC = &PPC::VSFRCRegClass;
3810           else if (Subtarget.hasSPE())
3811             // SPE passes doubles in GPR pairs.
3812             RC = &PPC::GPRCRegClass;
3813           else
3814             RC = &PPC::F8RCRegClass;
3815           break;
3816         case MVT::v16i8:
3817         case MVT::v8i16:
3818         case MVT::v4i32:
3819           RC = &PPC::VRRCRegClass;
3820           break;
3821         case MVT::v4f32:
3822           RC = &PPC::VRRCRegClass;
3823           break;
3824         case MVT::v2f64:
3825         case MVT::v2i64:
3826           RC = &PPC::VRRCRegClass;
3827           break;
3828       }
3829 
3830       SDValue ArgValue;
3831       // Transform the arguments stored in physical registers into
3832       // virtual ones.
3833       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3834         assert(i + 1 < e && "No second half of double precision argument");
3835         unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3836         unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3837         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3838         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3839         if (!Subtarget.isLittleEndian())
3840           std::swap (ArgValueLo, ArgValueHi);
3841         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3842                                ArgValueHi);
3843       } else {
3844         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3845         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3846                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
3847         if (ValVT == MVT::i1)
3848           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3849       }
3850 
3851       InVals.push_back(ArgValue);
3852     } else {
3853       // Argument stored in memory.
3854       assert(VA.isMemLoc());
3855 
3856       // Get the extended size of the argument type in stack
3857       unsigned ArgSize = VA.getLocVT().getStoreSize();
3858       // Get the actual size of the argument type
3859       unsigned ObjSize = VA.getValVT().getStoreSize();
3860       unsigned ArgOffset = VA.getLocMemOffset();
3861       // Stack objects in PPC32 are right justified.
3862       ArgOffset += ArgSize - ObjSize;
3863       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3864 
3865       // Create load nodes to retrieve arguments from the stack.
3866       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3867       InVals.push_back(
3868           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3869     }
3870   }
3871 
3872   // Assign locations to all of the incoming aggregate by value arguments.
3873   // Aggregates passed by value are stored in the local variable space of the
3874   // caller's stack frame, right above the parameter list area.
3875   SmallVector<CCValAssign, 16> ByValArgLocs;
3876   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3877                       ByValArgLocs, *DAG.getContext());
3878 
3879   // Reserve stack space for the allocations in CCInfo.
3880   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3881 
3882   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3883 
3884   // Area that is at least reserved in the caller of this function.
3885   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3886   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3887 
3888   // Set the size that is at least reserved in caller of this function.  Tail
3889   // call optimized function's reserved stack space needs to be aligned so that
3890   // taking the difference between two stack areas will result in an aligned
3891   // stack.
3892   MinReservedArea =
3893       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3894   FuncInfo->setMinReservedArea(MinReservedArea);
3895 
3896   SmallVector<SDValue, 8> MemOps;
3897 
3898   // If the function takes variable number of arguments, make a frame index for
3899   // the start of the first vararg value... for expansion of llvm.va_start.
3900   if (isVarArg) {
3901     static const MCPhysReg GPArgRegs[] = {
3902       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3903       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3904     };
3905     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3906 
3907     static const MCPhysReg FPArgRegs[] = {
3908       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3909       PPC::F8
3910     };
3911     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3912 
3913     if (useSoftFloat() || hasSPE())
3914        NumFPArgRegs = 0;
3915 
3916     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3917     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3918 
3919     // Make room for NumGPArgRegs and NumFPArgRegs.
3920     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3921                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3922 
3923     FuncInfo->setVarArgsStackOffset(
3924       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3925                             CCInfo.getNextStackOffset(), true));
3926 
3927     FuncInfo->setVarArgsFrameIndex(
3928         MFI.CreateStackObject(Depth, Align(8), false));
3929     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3930 
3931     // The fixed integer arguments of a variadic function are stored to the
3932     // VarArgsFrameIndex on the stack so that they may be loaded by
3933     // dereferencing the result of va_next.
3934     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3935       // Get an existing live-in vreg, or add a new one.
3936       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3937       if (!VReg)
3938         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3939 
3940       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3941       SDValue Store =
3942           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3943       MemOps.push_back(Store);
3944       // Increment the address by four for the next argument to store
3945       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3946       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3947     }
3948 
3949     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3950     // is set.
3951     // The double arguments are stored to the VarArgsFrameIndex
3952     // on the stack.
3953     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3954       // Get an existing live-in vreg, or add a new one.
3955       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3956       if (!VReg)
3957         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3958 
3959       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3960       SDValue Store =
3961           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3962       MemOps.push_back(Store);
3963       // Increment the address by eight for the next argument to store
3964       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3965                                          PtrVT);
3966       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3967     }
3968   }
3969 
3970   if (!MemOps.empty())
3971     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3972 
3973   return Chain;
3974 }
3975 
3976 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3977 // value to MVT::i64 and then truncate to the correct register size.
3978 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3979                                              EVT ObjectVT, SelectionDAG &DAG,
3980                                              SDValue ArgVal,
3981                                              const SDLoc &dl) const {
3982   if (Flags.isSExt())
3983     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3984                          DAG.getValueType(ObjectVT));
3985   else if (Flags.isZExt())
3986     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3987                          DAG.getValueType(ObjectVT));
3988 
3989   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3990 }
3991 
3992 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3993     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3994     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3995     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3996   // TODO: add description of PPC stack frame format, or at least some docs.
3997   //
3998   bool isELFv2ABI = Subtarget.isELFv2ABI();
3999   bool isLittleEndian = Subtarget.isLittleEndian();
4000   MachineFunction &MF = DAG.getMachineFunction();
4001   MachineFrameInfo &MFI = MF.getFrameInfo();
4002   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4003 
4004   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4005          "fastcc not supported on varargs functions");
4006 
4007   EVT PtrVT = getPointerTy(MF.getDataLayout());
4008   // Potential tail calls could cause overwriting of argument stack slots.
4009   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4010                        (CallConv == CallingConv::Fast));
4011   unsigned PtrByteSize = 8;
4012   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4013 
4014   static const MCPhysReg GPR[] = {
4015     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4016     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4017   };
4018   static const MCPhysReg VR[] = {
4019     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4020     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4021   };
4022 
4023   const unsigned Num_GPR_Regs = array_lengthof(GPR);
4024   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4025   const unsigned Num_VR_Regs  = array_lengthof(VR);
4026 
4027   // Do a first pass over the arguments to determine whether the ABI
4028   // guarantees that our caller has allocated the parameter save area
4029   // on its stack frame.  In the ELFv1 ABI, this is always the case;
4030   // in the ELFv2 ABI, it is true if this is a vararg function or if
4031   // any parameter is located in a stack slot.
4032 
4033   bool HasParameterArea = !isELFv2ABI || isVarArg;
4034   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4035   unsigned NumBytes = LinkageSize;
4036   unsigned AvailableFPRs = Num_FPR_Regs;
4037   unsigned AvailableVRs = Num_VR_Regs;
4038   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4039     if (Ins[i].Flags.isNest())
4040       continue;
4041 
4042     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4043                                PtrByteSize, LinkageSize, ParamAreaSize,
4044                                NumBytes, AvailableFPRs, AvailableVRs))
4045       HasParameterArea = true;
4046   }
4047 
4048   // Add DAG nodes to load the arguments or copy them out of registers.  On
4049   // entry to a function on PPC, the arguments start after the linkage area,
4050   // although the first ones are often in registers.
4051 
4052   unsigned ArgOffset = LinkageSize;
4053   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4054   SmallVector<SDValue, 8> MemOps;
4055   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4056   unsigned CurArgIdx = 0;
4057   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4058     SDValue ArgVal;
4059     bool needsLoad = false;
4060     EVT ObjectVT = Ins[ArgNo].VT;
4061     EVT OrigVT = Ins[ArgNo].ArgVT;
4062     unsigned ObjSize = ObjectVT.getStoreSize();
4063     unsigned ArgSize = ObjSize;
4064     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4065     if (Ins[ArgNo].isOrigArg()) {
4066       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4067       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4068     }
4069     // We re-align the argument offset for each argument, except when using the
4070     // fast calling convention, when we need to make sure we do that only when
4071     // we'll actually use a stack slot.
4072     unsigned CurArgOffset;
4073     Align Alignment;
4074     auto ComputeArgOffset = [&]() {
4075       /* Respect alignment of argument on the stack.  */
4076       Alignment =
4077           CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4078       ArgOffset = alignTo(ArgOffset, Alignment);
4079       CurArgOffset = ArgOffset;
4080     };
4081 
4082     if (CallConv != CallingConv::Fast) {
4083       ComputeArgOffset();
4084 
4085       /* Compute GPR index associated with argument offset.  */
4086       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4087       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4088     }
4089 
4090     // FIXME the codegen can be much improved in some cases.
4091     // We do not have to keep everything in memory.
4092     if (Flags.isByVal()) {
4093       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4094 
4095       if (CallConv == CallingConv::Fast)
4096         ComputeArgOffset();
4097 
4098       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4099       ObjSize = Flags.getByValSize();
4100       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4101       // Empty aggregate parameters do not take up registers.  Examples:
4102       //   struct { } a;
4103       //   union  { } b;
4104       //   int c[0];
4105       // etc.  However, we have to provide a place-holder in InVals, so
4106       // pretend we have an 8-byte item at the current address for that
4107       // purpose.
4108       if (!ObjSize) {
4109         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4110         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4111         InVals.push_back(FIN);
4112         continue;
4113       }
4114 
4115       // Create a stack object covering all stack doublewords occupied
4116       // by the argument.  If the argument is (fully or partially) on
4117       // the stack, or if the argument is fully in registers but the
4118       // caller has allocated the parameter save anyway, we can refer
4119       // directly to the caller's stack frame.  Otherwise, create a
4120       // local copy in our own frame.
4121       int FI;
4122       if (HasParameterArea ||
4123           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4124         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4125       else
4126         FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4127       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4128 
4129       // Handle aggregates smaller than 8 bytes.
4130       if (ObjSize < PtrByteSize) {
4131         // The value of the object is its address, which differs from the
4132         // address of the enclosing doubleword on big-endian systems.
4133         SDValue Arg = FIN;
4134         if (!isLittleEndian) {
4135           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4136           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4137         }
4138         InVals.push_back(Arg);
4139 
4140         if (GPR_idx != Num_GPR_Regs) {
4141           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4142           FuncInfo->addLiveInAttr(VReg, Flags);
4143           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4144           SDValue Store;
4145 
4146           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4147             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4148                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
4149             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4150                                       MachinePointerInfo(&*FuncArg), ObjType);
4151           } else {
4152             // For sizes that don't fit a truncating store (3, 5, 6, 7),
4153             // store the whole register as-is to the parameter save area
4154             // slot.
4155             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4156                                  MachinePointerInfo(&*FuncArg));
4157           }
4158 
4159           MemOps.push_back(Store);
4160         }
4161         // Whether we copied from a register or not, advance the offset
4162         // into the parameter save area by a full doubleword.
4163         ArgOffset += PtrByteSize;
4164         continue;
4165       }
4166 
4167       // The value of the object is its address, which is the address of
4168       // its first stack doubleword.
4169       InVals.push_back(FIN);
4170 
4171       // Store whatever pieces of the object are in registers to memory.
4172       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4173         if (GPR_idx == Num_GPR_Regs)
4174           break;
4175 
4176         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4177         FuncInfo->addLiveInAttr(VReg, Flags);
4178         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4179         SDValue Addr = FIN;
4180         if (j) {
4181           SDValue Off = DAG.getConstant(j, dl, PtrVT);
4182           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4183         }
4184         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4185                                      MachinePointerInfo(&*FuncArg, j));
4186         MemOps.push_back(Store);
4187         ++GPR_idx;
4188       }
4189       ArgOffset += ArgSize;
4190       continue;
4191     }
4192 
4193     switch (ObjectVT.getSimpleVT().SimpleTy) {
4194     default: llvm_unreachable("Unhandled argument type!");
4195     case MVT::i1:
4196     case MVT::i32:
4197     case MVT::i64:
4198       if (Flags.isNest()) {
4199         // The 'nest' parameter, if any, is passed in R11.
4200         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4201         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4202 
4203         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4204           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4205 
4206         break;
4207       }
4208 
4209       // These can be scalar arguments or elements of an integer array type
4210       // passed directly.  Clang may use those instead of "byval" aggregate
4211       // types to avoid forcing arguments to memory unnecessarily.
4212       if (GPR_idx != Num_GPR_Regs) {
4213         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4214         FuncInfo->addLiveInAttr(VReg, Flags);
4215         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4216 
4217         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4218           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4219           // value to MVT::i64 and then truncate to the correct register size.
4220           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4221       } else {
4222         if (CallConv == CallingConv::Fast)
4223           ComputeArgOffset();
4224 
4225         needsLoad = true;
4226         ArgSize = PtrByteSize;
4227       }
4228       if (CallConv != CallingConv::Fast || needsLoad)
4229         ArgOffset += 8;
4230       break;
4231 
4232     case MVT::f32:
4233     case MVT::f64:
4234       // These can be scalar arguments or elements of a float array type
4235       // passed directly.  The latter are used to implement ELFv2 homogenous
4236       // float aggregates.
4237       if (FPR_idx != Num_FPR_Regs) {
4238         unsigned VReg;
4239 
4240         if (ObjectVT == MVT::f32)
4241           VReg = MF.addLiveIn(FPR[FPR_idx],
4242                               Subtarget.hasP8Vector()
4243                                   ? &PPC::VSSRCRegClass
4244                                   : &PPC::F4RCRegClass);
4245         else
4246           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4247                                                 ? &PPC::VSFRCRegClass
4248                                                 : &PPC::F8RCRegClass);
4249 
4250         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4251         ++FPR_idx;
4252       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4253         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4254         // once we support fp <-> gpr moves.
4255 
4256         // This can only ever happen in the presence of f32 array types,
4257         // since otherwise we never run out of FPRs before running out
4258         // of GPRs.
4259         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4260         FuncInfo->addLiveInAttr(VReg, Flags);
4261         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4262 
4263         if (ObjectVT == MVT::f32) {
4264           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4265             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4266                                  DAG.getConstant(32, dl, MVT::i32));
4267           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4268         }
4269 
4270         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4271       } else {
4272         if (CallConv == CallingConv::Fast)
4273           ComputeArgOffset();
4274 
4275         needsLoad = true;
4276       }
4277 
4278       // When passing an array of floats, the array occupies consecutive
4279       // space in the argument area; only round up to the next doubleword
4280       // at the end of the array.  Otherwise, each float takes 8 bytes.
4281       if (CallConv != CallingConv::Fast || needsLoad) {
4282         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4283         ArgOffset += ArgSize;
4284         if (Flags.isInConsecutiveRegsLast())
4285           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4286       }
4287       break;
4288     case MVT::v4f32:
4289     case MVT::v4i32:
4290     case MVT::v8i16:
4291     case MVT::v16i8:
4292     case MVT::v2f64:
4293     case MVT::v2i64:
4294     case MVT::v1i128:
4295     case MVT::f128:
4296       // These can be scalar arguments or elements of a vector array type
4297       // passed directly.  The latter are used to implement ELFv2 homogenous
4298       // vector aggregates.
4299       if (VR_idx != Num_VR_Regs) {
4300         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4301         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4302         ++VR_idx;
4303       } else {
4304         if (CallConv == CallingConv::Fast)
4305           ComputeArgOffset();
4306         needsLoad = true;
4307       }
4308       if (CallConv != CallingConv::Fast || needsLoad)
4309         ArgOffset += 16;
4310       break;
4311     }
4312 
4313     // We need to load the argument to a virtual register if we determined
4314     // above that we ran out of physical registers of the appropriate type.
4315     if (needsLoad) {
4316       if (ObjSize < ArgSize && !isLittleEndian)
4317         CurArgOffset += ArgSize - ObjSize;
4318       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4319       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4320       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4321     }
4322 
4323     InVals.push_back(ArgVal);
4324   }
4325 
4326   // Area that is at least reserved in the caller of this function.
4327   unsigned MinReservedArea;
4328   if (HasParameterArea)
4329     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4330   else
4331     MinReservedArea = LinkageSize;
4332 
4333   // Set the size that is at least reserved in caller of this function.  Tail
4334   // call optimized functions' reserved stack space needs to be aligned so that
4335   // taking the difference between two stack areas will result in an aligned
4336   // stack.
4337   MinReservedArea =
4338       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4339   FuncInfo->setMinReservedArea(MinReservedArea);
4340 
4341   // If the function takes variable number of arguments, make a frame index for
4342   // the start of the first vararg value... for expansion of llvm.va_start.
4343   // On ELFv2ABI spec, it writes:
4344   // C programs that are intended to be *portable* across different compilers
4345   // and architectures must use the header file <stdarg.h> to deal with variable
4346   // argument lists.
4347   if (isVarArg && MFI.hasVAStart()) {
4348     int Depth = ArgOffset;
4349 
4350     FuncInfo->setVarArgsFrameIndex(
4351       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4352     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4353 
4354     // If this function is vararg, store any remaining integer argument regs
4355     // to their spots on the stack so that they may be loaded by dereferencing
4356     // the result of va_next.
4357     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4358          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4359       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4360       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4361       SDValue Store =
4362           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4363       MemOps.push_back(Store);
4364       // Increment the address by four for the next argument to store
4365       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4366       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4367     }
4368   }
4369 
4370   if (!MemOps.empty())
4371     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4372 
4373   return Chain;
4374 }
4375 
4376 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4377 /// adjusted to accommodate the arguments for the tailcall.
4378 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4379                                    unsigned ParamSize) {
4380 
4381   if (!isTailCall) return 0;
4382 
4383   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4384   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4385   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4386   // Remember only if the new adjustment is bigger.
4387   if (SPDiff < FI->getTailCallSPDelta())
4388     FI->setTailCallSPDelta(SPDiff);
4389 
4390   return SPDiff;
4391 }
4392 
4393 static bool isFunctionGlobalAddress(SDValue Callee);
4394 
4395 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4396                               const TargetMachine &TM) {
4397   // It does not make sense to call callsShareTOCBase() with a caller that
4398   // is PC Relative since PC Relative callers do not have a TOC.
4399 #ifndef NDEBUG
4400   const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4401   assert(!STICaller->isUsingPCRelativeCalls() &&
4402          "PC Relative callers do not have a TOC and cannot share a TOC Base");
4403 #endif
4404 
4405   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4406   // don't have enough information to determine if the caller and callee share
4407   // the same  TOC base, so we have to pessimistically assume they don't for
4408   // correctness.
4409   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4410   if (!G)
4411     return false;
4412 
4413   const GlobalValue *GV = G->getGlobal();
4414 
4415   // If the callee is preemptable, then the static linker will use a plt-stub
4416   // which saves the toc to the stack, and needs a nop after the call
4417   // instruction to convert to a toc-restore.
4418   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4419     return false;
4420 
4421   // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4422   // We may need a TOC restore in the situation where the caller requires a
4423   // valid TOC but the callee is PC Relative and does not.
4424   const Function *F = dyn_cast<Function>(GV);
4425   const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4426 
4427   // If we have an Alias we can try to get the function from there.
4428   if (Alias) {
4429     const GlobalObject *GlobalObj = Alias->getBaseObject();
4430     F = dyn_cast<Function>(GlobalObj);
4431   }
4432 
4433   // If we still have no valid function pointer we do not have enough
4434   // information to determine if the callee uses PC Relative calls so we must
4435   // assume that it does.
4436   if (!F)
4437     return false;
4438 
4439   // If the callee uses PC Relative we cannot guarantee that the callee won't
4440   // clobber the TOC of the caller and so we must assume that the two
4441   // functions do not share a TOC base.
4442   const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4443   if (STICallee->isUsingPCRelativeCalls())
4444     return false;
4445 
4446   // If the GV is not a strong definition then we need to assume it can be
4447   // replaced by another function at link time. The function that replaces
4448   // it may not share the same TOC as the caller since the callee may be
4449   // replaced by a PC Relative version of the same function.
4450   if (!GV->isStrongDefinitionForLinker())
4451     return false;
4452 
4453   // The medium and large code models are expected to provide a sufficiently
4454   // large TOC to provide all data addressing needs of a module with a
4455   // single TOC.
4456   if (CodeModel::Medium == TM.getCodeModel() ||
4457       CodeModel::Large == TM.getCodeModel())
4458     return true;
4459 
4460   // Any explicitly-specified sections and section prefixes must also match.
4461   // Also, if we're using -ffunction-sections, then each function is always in
4462   // a different section (the same is true for COMDAT functions).
4463   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4464       GV->getSection() != Caller->getSection())
4465     return false;
4466   if (const auto *F = dyn_cast<Function>(GV)) {
4467     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4468       return false;
4469   }
4470 
4471   return true;
4472 }
4473 
4474 static bool
4475 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4476                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4477   assert(Subtarget.is64BitELFABI());
4478 
4479   const unsigned PtrByteSize = 8;
4480   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4481 
4482   static const MCPhysReg GPR[] = {
4483     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4484     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4485   };
4486   static const MCPhysReg VR[] = {
4487     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4488     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4489   };
4490 
4491   const unsigned NumGPRs = array_lengthof(GPR);
4492   const unsigned NumFPRs = 13;
4493   const unsigned NumVRs = array_lengthof(VR);
4494   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4495 
4496   unsigned NumBytes = LinkageSize;
4497   unsigned AvailableFPRs = NumFPRs;
4498   unsigned AvailableVRs = NumVRs;
4499 
4500   for (const ISD::OutputArg& Param : Outs) {
4501     if (Param.Flags.isNest()) continue;
4502 
4503     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4504                                LinkageSize, ParamAreaSize, NumBytes,
4505                                AvailableFPRs, AvailableVRs))
4506       return true;
4507   }
4508   return false;
4509 }
4510 
4511 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4512   if (CB.arg_size() != CallerFn->arg_size())
4513     return false;
4514 
4515   auto CalleeArgIter = CB.arg_begin();
4516   auto CalleeArgEnd = CB.arg_end();
4517   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4518 
4519   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4520     const Value* CalleeArg = *CalleeArgIter;
4521     const Value* CallerArg = &(*CallerArgIter);
4522     if (CalleeArg == CallerArg)
4523       continue;
4524 
4525     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4526     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4527     //      }
4528     // 1st argument of callee is undef and has the same type as caller.
4529     if (CalleeArg->getType() == CallerArg->getType() &&
4530         isa<UndefValue>(CalleeArg))
4531       continue;
4532 
4533     return false;
4534   }
4535 
4536   return true;
4537 }
4538 
4539 // Returns true if TCO is possible between the callers and callees
4540 // calling conventions.
4541 static bool
4542 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4543                                     CallingConv::ID CalleeCC) {
4544   // Tail calls are possible with fastcc and ccc.
4545   auto isTailCallableCC  = [] (CallingConv::ID CC){
4546       return  CC == CallingConv::C || CC == CallingConv::Fast;
4547   };
4548   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4549     return false;
4550 
4551   // We can safely tail call both fastcc and ccc callees from a c calling
4552   // convention caller. If the caller is fastcc, we may have less stack space
4553   // than a non-fastcc caller with the same signature so disable tail-calls in
4554   // that case.
4555   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4556 }
4557 
4558 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4559     SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4560     const SmallVectorImpl<ISD::OutputArg> &Outs,
4561     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4562   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4563 
4564   if (DisableSCO && !TailCallOpt) return false;
4565 
4566   // Variadic argument functions are not supported.
4567   if (isVarArg) return false;
4568 
4569   auto &Caller = DAG.getMachineFunction().getFunction();
4570   // Check that the calling conventions are compatible for tco.
4571   if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4572     return false;
4573 
4574   // Caller contains any byval parameter is not supported.
4575   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4576     return false;
4577 
4578   // Callee contains any byval parameter is not supported, too.
4579   // Note: This is a quick work around, because in some cases, e.g.
4580   // caller's stack size > callee's stack size, we are still able to apply
4581   // sibling call optimization. For example, gcc is able to do SCO for caller1
4582   // in the following example, but not for caller2.
4583   //   struct test {
4584   //     long int a;
4585   //     char ary[56];
4586   //   } gTest;
4587   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4588   //     b->a = v.a;
4589   //     return 0;
4590   //   }
4591   //   void caller1(struct test a, struct test c, struct test *b) {
4592   //     callee(gTest, b); }
4593   //   void caller2(struct test *b) { callee(gTest, b); }
4594   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4595     return false;
4596 
4597   // If callee and caller use different calling conventions, we cannot pass
4598   // parameters on stack since offsets for the parameter area may be different.
4599   if (Caller.getCallingConv() != CalleeCC &&
4600       needStackSlotPassParameters(Subtarget, Outs))
4601     return false;
4602 
4603   // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4604   // the caller and callee share the same TOC for TCO/SCO. If the caller and
4605   // callee potentially have different TOC bases then we cannot tail call since
4606   // we need to restore the TOC pointer after the call.
4607   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4608   // We cannot guarantee this for indirect calls or calls to external functions.
4609   // When PC-Relative addressing is used, the concept of the TOC is no longer
4610   // applicable so this check is not required.
4611   // Check first for indirect calls.
4612   if (!Subtarget.isUsingPCRelativeCalls() &&
4613       !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4614     return false;
4615 
4616   // Check if we share the TOC base.
4617   if (!Subtarget.isUsingPCRelativeCalls() &&
4618       !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4619     return false;
4620 
4621   // TCO allows altering callee ABI, so we don't have to check further.
4622   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4623     return true;
4624 
4625   if (DisableSCO) return false;
4626 
4627   // If callee use the same argument list that caller is using, then we can
4628   // apply SCO on this case. If it is not, then we need to check if callee needs
4629   // stack for passing arguments.
4630   // PC Relative tail calls may not have a CallBase.
4631   // If there is no CallBase we cannot verify if we have the same argument
4632   // list so assume that we don't have the same argument list.
4633   if (CB && !hasSameArgumentList(&Caller, *CB) &&
4634       needStackSlotPassParameters(Subtarget, Outs))
4635     return false;
4636   else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4637     return false;
4638 
4639   return true;
4640 }
4641 
4642 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4643 /// for tail call optimization. Targets which want to do tail call
4644 /// optimization should implement this function.
4645 bool
4646 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4647                                                      CallingConv::ID CalleeCC,
4648                                                      bool isVarArg,
4649                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4650                                                      SelectionDAG& DAG) const {
4651   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4652     return false;
4653 
4654   // Variable argument functions are not supported.
4655   if (isVarArg)
4656     return false;
4657 
4658   MachineFunction &MF = DAG.getMachineFunction();
4659   CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4660   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4661     // Functions containing by val parameters are not supported.
4662     for (unsigned i = 0; i != Ins.size(); i++) {
4663        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4664        if (Flags.isByVal()) return false;
4665     }
4666 
4667     // Non-PIC/GOT tail calls are supported.
4668     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4669       return true;
4670 
4671     // At the moment we can only do local tail calls (in same module, hidden
4672     // or protected) if we are generating PIC.
4673     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4674       return G->getGlobal()->hasHiddenVisibility()
4675           || G->getGlobal()->hasProtectedVisibility();
4676   }
4677 
4678   return false;
4679 }
4680 
4681 /// isCallCompatibleAddress - Return the immediate to use if the specified
4682 /// 32-bit value is representable in the immediate field of a BxA instruction.
4683 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4684   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4685   if (!C) return nullptr;
4686 
4687   int Addr = C->getZExtValue();
4688   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4689       SignExtend32<26>(Addr) != Addr)
4690     return nullptr;  // Top 6 bits have to be sext of immediate.
4691 
4692   return DAG
4693       .getConstant(
4694           (int)C->getZExtValue() >> 2, SDLoc(Op),
4695           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4696       .getNode();
4697 }
4698 
4699 namespace {
4700 
4701 struct TailCallArgumentInfo {
4702   SDValue Arg;
4703   SDValue FrameIdxOp;
4704   int FrameIdx = 0;
4705 
4706   TailCallArgumentInfo() = default;
4707 };
4708 
4709 } // end anonymous namespace
4710 
4711 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4712 static void StoreTailCallArgumentsToStackSlot(
4713     SelectionDAG &DAG, SDValue Chain,
4714     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4715     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4716   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4717     SDValue Arg = TailCallArgs[i].Arg;
4718     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4719     int FI = TailCallArgs[i].FrameIdx;
4720     // Store relative to framepointer.
4721     MemOpChains.push_back(DAG.getStore(
4722         Chain, dl, Arg, FIN,
4723         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4724   }
4725 }
4726 
4727 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4728 /// the appropriate stack slot for the tail call optimized function call.
4729 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4730                                              SDValue OldRetAddr, SDValue OldFP,
4731                                              int SPDiff, const SDLoc &dl) {
4732   if (SPDiff) {
4733     // Calculate the new stack slot for the return address.
4734     MachineFunction &MF = DAG.getMachineFunction();
4735     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4736     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4737     bool isPPC64 = Subtarget.isPPC64();
4738     int SlotSize = isPPC64 ? 8 : 4;
4739     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4740     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4741                                                          NewRetAddrLoc, true);
4742     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4743     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4744     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4745                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4746   }
4747   return Chain;
4748 }
4749 
4750 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4751 /// the position of the argument.
4752 static void
4753 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4754                          SDValue Arg, int SPDiff, unsigned ArgOffset,
4755                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4756   int Offset = ArgOffset + SPDiff;
4757   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4758   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4759   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4760   SDValue FIN = DAG.getFrameIndex(FI, VT);
4761   TailCallArgumentInfo Info;
4762   Info.Arg = Arg;
4763   Info.FrameIdxOp = FIN;
4764   Info.FrameIdx = FI;
4765   TailCallArguments.push_back(Info);
4766 }
4767 
4768 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4769 /// stack slot. Returns the chain as result and the loaded frame pointers in
4770 /// LROpOut/FPOpout. Used when tail calling.
4771 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4772     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4773     SDValue &FPOpOut, const SDLoc &dl) const {
4774   if (SPDiff) {
4775     // Load the LR and FP stack slot for later adjusting.
4776     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4777     LROpOut = getReturnAddrFrameIndex(DAG);
4778     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4779     Chain = SDValue(LROpOut.getNode(), 1);
4780   }
4781   return Chain;
4782 }
4783 
4784 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4785 /// by "Src" to address "Dst" of size "Size".  Alignment information is
4786 /// specified by the specific parameter attribute. The copy will be passed as
4787 /// a byval function parameter.
4788 /// Sometimes what we are copying is the end of a larger object, the part that
4789 /// does not fit in registers.
4790 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4791                                          SDValue Chain, ISD::ArgFlagsTy Flags,
4792                                          SelectionDAG &DAG, const SDLoc &dl) {
4793   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4794   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
4795                        Flags.getNonZeroByValAlign(), false, false, false,
4796                        MachinePointerInfo(), MachinePointerInfo());
4797 }
4798 
4799 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4800 /// tail calls.
4801 static void LowerMemOpCallTo(
4802     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4803     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4804     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4805     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4806   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4807   if (!isTailCall) {
4808     if (isVector) {
4809       SDValue StackPtr;
4810       if (isPPC64)
4811         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4812       else
4813         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4814       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4815                            DAG.getConstant(ArgOffset, dl, PtrVT));
4816     }
4817     MemOpChains.push_back(
4818         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4819     // Calculate and remember argument location.
4820   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4821                                   TailCallArguments);
4822 }
4823 
4824 static void
4825 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4826                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4827                 SDValue FPOp,
4828                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4829   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4830   // might overwrite each other in case of tail call optimization.
4831   SmallVector<SDValue, 8> MemOpChains2;
4832   // Do not flag preceding copytoreg stuff together with the following stuff.
4833   InFlag = SDValue();
4834   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4835                                     MemOpChains2, dl);
4836   if (!MemOpChains2.empty())
4837     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4838 
4839   // Store the return address to the appropriate stack slot.
4840   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4841 
4842   // Emit callseq_end just before tailcall node.
4843   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4844                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4845   InFlag = Chain.getValue(1);
4846 }
4847 
4848 // Is this global address that of a function that can be called by name? (as
4849 // opposed to something that must hold a descriptor for an indirect call).
4850 static bool isFunctionGlobalAddress(SDValue Callee) {
4851   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4852     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4853         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4854       return false;
4855 
4856     return G->getGlobal()->getValueType()->isFunctionTy();
4857   }
4858 
4859   return false;
4860 }
4861 
4862 SDValue PPCTargetLowering::LowerCallResult(
4863     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4864     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4865     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4866   SmallVector<CCValAssign, 16> RVLocs;
4867   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4868                     *DAG.getContext());
4869 
4870   CCRetInfo.AnalyzeCallResult(
4871       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
4872                ? RetCC_PPC_Cold
4873                : RetCC_PPC);
4874 
4875   // Copy all of the result registers out of their specified physreg.
4876   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4877     CCValAssign &VA = RVLocs[i];
4878     assert(VA.isRegLoc() && "Can only return in registers!");
4879 
4880     SDValue Val;
4881 
4882     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
4883       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
4884                                       InFlag);
4885       Chain = Lo.getValue(1);
4886       InFlag = Lo.getValue(2);
4887       VA = RVLocs[++i]; // skip ahead to next loc
4888       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
4889                                       InFlag);
4890       Chain = Hi.getValue(1);
4891       InFlag = Hi.getValue(2);
4892       if (!Subtarget.isLittleEndian())
4893         std::swap (Lo, Hi);
4894       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
4895     } else {
4896       Val = DAG.getCopyFromReg(Chain, dl,
4897                                VA.getLocReg(), VA.getLocVT(), InFlag);
4898       Chain = Val.getValue(1);
4899       InFlag = Val.getValue(2);
4900     }
4901 
4902     switch (VA.getLocInfo()) {
4903     default: llvm_unreachable("Unknown loc info!");
4904     case CCValAssign::Full: break;
4905     case CCValAssign::AExt:
4906       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4907       break;
4908     case CCValAssign::ZExt:
4909       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4910                         DAG.getValueType(VA.getValVT()));
4911       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4912       break;
4913     case CCValAssign::SExt:
4914       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4915                         DAG.getValueType(VA.getValVT()));
4916       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4917       break;
4918     }
4919 
4920     InVals.push_back(Val);
4921   }
4922 
4923   return Chain;
4924 }
4925 
4926 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
4927                            const PPCSubtarget &Subtarget, bool isPatchPoint) {
4928   // PatchPoint calls are not indirect.
4929   if (isPatchPoint)
4930     return false;
4931 
4932   if (isFunctionGlobalAddress(Callee) || isa<ExternalSymbolSDNode>(Callee))
4933     return false;
4934 
4935   // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
4936   // becuase the immediate function pointer points to a descriptor instead of
4937   // a function entry point. The ELFv2 ABI cannot use a BLA because the function
4938   // pointer immediate points to the global entry point, while the BLA would
4939   // need to jump to the local entry point (see rL211174).
4940   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
4941       isBLACompatibleAddress(Callee, DAG))
4942     return false;
4943 
4944   return true;
4945 }
4946 
4947 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
4948 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
4949   return Subtarget.isAIXABI() ||
4950          (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
4951 }
4952 
4953 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
4954                               const Function &Caller,
4955                               const SDValue &Callee,
4956                               const PPCSubtarget &Subtarget,
4957                               const TargetMachine &TM) {
4958   if (CFlags.IsTailCall)
4959     return PPCISD::TC_RETURN;
4960 
4961   // This is a call through a function pointer.
4962   if (CFlags.IsIndirect) {
4963     // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
4964     // indirect calls. The save of the caller's TOC pointer to the stack will be
4965     // inserted into the DAG as part of call lowering. The restore of the TOC
4966     // pointer is modeled by using a pseudo instruction for the call opcode that
4967     // represents the 2 instruction sequence of an indirect branch and link,
4968     // immediately followed by a load of the TOC pointer from the the stack save
4969     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
4970     // as it is not saved or used.
4971     return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
4972                                                : PPCISD::BCTRL;
4973   }
4974 
4975   if (Subtarget.isUsingPCRelativeCalls()) {
4976     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
4977     return PPCISD::CALL_NOTOC;
4978   }
4979 
4980   // The ABIs that maintain a TOC pointer accross calls need to have a nop
4981   // immediately following the call instruction if the caller and callee may
4982   // have different TOC bases. At link time if the linker determines the calls
4983   // may not share a TOC base, the call is redirected to a trampoline inserted
4984   // by the linker. The trampoline will (among other things) save the callers
4985   // TOC pointer at an ABI designated offset in the linkage area and the linker
4986   // will rewrite the nop to be a load of the TOC pointer from the linkage area
4987   // into gpr2.
4988   if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
4989     return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
4990                                                   : PPCISD::CALL_NOP;
4991 
4992   return PPCISD::CALL;
4993 }
4994 
4995 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
4996                                const SDLoc &dl, const PPCSubtarget &Subtarget) {
4997   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
4998     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
4999       return SDValue(Dest, 0);
5000 
5001   // Returns true if the callee is local, and false otherwise.
5002   auto isLocalCallee = [&]() {
5003     const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5004     const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5005     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5006 
5007     return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5008            !dyn_cast_or_null<GlobalIFunc>(GV);
5009   };
5010 
5011   // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5012   // a static relocation model causes some versions of GNU LD (2.17.50, at
5013   // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5014   // built with secure-PLT.
5015   bool UsePlt =
5016       Subtarget.is32BitELFABI() && !isLocalCallee() &&
5017       Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5018 
5019   const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5020     const TargetMachine &TM = Subtarget.getTargetMachine();
5021     const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5022     MCSymbolXCOFF *S =
5023         cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5024 
5025     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5026     return DAG.getMCSymbol(S, PtrVT);
5027   };
5028 
5029   if (isFunctionGlobalAddress(Callee)) {
5030     const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5031 
5032     if (Subtarget.isAIXABI()) {
5033       assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5034       return getAIXFuncEntryPointSymbolSDNode(GV);
5035     }
5036     return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5037                                       UsePlt ? PPCII::MO_PLT : 0);
5038   }
5039 
5040   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5041     const char *SymName = S->getSymbol();
5042     if (Subtarget.isAIXABI()) {
5043       // If there exists a user-declared function whose name is the same as the
5044       // ExternalSymbol's, then we pick up the user-declared version.
5045       const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5046       if (const Function *F =
5047               dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5048         return getAIXFuncEntryPointSymbolSDNode(F);
5049 
5050       // On AIX, direct function calls reference the symbol for the function's
5051       // entry point, which is named by prepending a "." before the function's
5052       // C-linkage name. A Qualname is returned here because an external
5053       // function entry point is a csect with XTY_ER property.
5054       const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5055         auto &Context = DAG.getMachineFunction().getMMI().getContext();
5056         MCSectionXCOFF *Sec = Context.getXCOFFSection(
5057             (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5058             XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
5059         return Sec->getQualNameSymbol();
5060       };
5061 
5062       SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5063     }
5064     return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5065                                        UsePlt ? PPCII::MO_PLT : 0);
5066   }
5067 
5068   // No transformation needed.
5069   assert(Callee.getNode() && "What no callee?");
5070   return Callee;
5071 }
5072 
5073 static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5074   assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5075          "Expected a CALLSEQ_STARTSDNode.");
5076 
5077   // The last operand is the chain, except when the node has glue. If the node
5078   // has glue, then the last operand is the glue, and the chain is the second
5079   // last operand.
5080   SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5081   if (LastValue.getValueType() != MVT::Glue)
5082     return LastValue;
5083 
5084   return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5085 }
5086 
5087 // Creates the node that moves a functions address into the count register
5088 // to prepare for an indirect call instruction.
5089 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5090                                 SDValue &Glue, SDValue &Chain,
5091                                 const SDLoc &dl) {
5092   SDValue MTCTROps[] = {Chain, Callee, Glue};
5093   EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5094   Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5095                       makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5096   // The glue is the second value produced.
5097   Glue = Chain.getValue(1);
5098 }
5099 
5100 static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5101                                           SDValue &Glue, SDValue &Chain,
5102                                           SDValue CallSeqStart,
5103                                           const CallBase *CB, const SDLoc &dl,
5104                                           bool hasNest,
5105                                           const PPCSubtarget &Subtarget) {
5106   // Function pointers in the 64-bit SVR4 ABI do not point to the function
5107   // entry point, but to the function descriptor (the function entry point
5108   // address is part of the function descriptor though).
5109   // The function descriptor is a three doubleword structure with the
5110   // following fields: function entry point, TOC base address and
5111   // environment pointer.
5112   // Thus for a call through a function pointer, the following actions need
5113   // to be performed:
5114   //   1. Save the TOC of the caller in the TOC save area of its stack
5115   //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5116   //   2. Load the address of the function entry point from the function
5117   //      descriptor.
5118   //   3. Load the TOC of the callee from the function descriptor into r2.
5119   //   4. Load the environment pointer from the function descriptor into
5120   //      r11.
5121   //   5. Branch to the function entry point address.
5122   //   6. On return of the callee, the TOC of the caller needs to be
5123   //      restored (this is done in FinishCall()).
5124   //
5125   // The loads are scheduled at the beginning of the call sequence, and the
5126   // register copies are flagged together to ensure that no other
5127   // operations can be scheduled in between. E.g. without flagging the
5128   // copies together, a TOC access in the caller could be scheduled between
5129   // the assignment of the callee TOC and the branch to the callee, which leads
5130   // to incorrect code.
5131 
5132   // Start by loading the function address from the descriptor.
5133   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5134   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5135                       ? (MachineMemOperand::MODereferenceable |
5136                          MachineMemOperand::MOInvariant)
5137                       : MachineMemOperand::MONone;
5138 
5139   MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5140 
5141   // Registers used in building the DAG.
5142   const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5143   const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5144 
5145   // Offsets of descriptor members.
5146   const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5147   const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5148 
5149   const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5150   const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5151 
5152   // One load for the functions entry point address.
5153   SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5154                                     Alignment, MMOFlags);
5155 
5156   // One for loading the TOC anchor for the module that contains the called
5157   // function.
5158   SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5159   SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5160   SDValue TOCPtr =
5161       DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5162                   MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5163 
5164   // One for loading the environment pointer.
5165   SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5166   SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5167   SDValue LoadEnvPtr =
5168       DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5169                   MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5170 
5171 
5172   // Then copy the newly loaded TOC anchor to the TOC pointer.
5173   SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5174   Chain = TOCVal.getValue(0);
5175   Glue = TOCVal.getValue(1);
5176 
5177   // If the function call has an explicit 'nest' parameter, it takes the
5178   // place of the environment pointer.
5179   assert((!hasNest || !Subtarget.isAIXABI()) &&
5180          "Nest parameter is not supported on AIX.");
5181   if (!hasNest) {
5182     SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5183     Chain = EnvVal.getValue(0);
5184     Glue = EnvVal.getValue(1);
5185   }
5186 
5187   // The rest of the indirect call sequence is the same as the non-descriptor
5188   // DAG.
5189   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5190 }
5191 
5192 static void
5193 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5194                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5195                   SelectionDAG &DAG,
5196                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5197                   SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5198                   const PPCSubtarget &Subtarget) {
5199   const bool IsPPC64 = Subtarget.isPPC64();
5200   // MVT for a general purpose register.
5201   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5202 
5203   // First operand is always the chain.
5204   Ops.push_back(Chain);
5205 
5206   // If it's a direct call pass the callee as the second operand.
5207   if (!CFlags.IsIndirect)
5208     Ops.push_back(Callee);
5209   else {
5210     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5211 
5212     // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5213     // on the stack (this would have been done in `LowerCall_64SVR4` or
5214     // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5215     // represents both the indirect branch and a load that restores the TOC
5216     // pointer from the linkage area. The operand for the TOC restore is an add
5217     // of the TOC save offset to the stack pointer. This must be the second
5218     // operand: after the chain input but before any other variadic arguments.
5219     // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5220     // saved or used.
5221     if (isTOCSaveRestoreRequired(Subtarget)) {
5222       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5223 
5224       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5225       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5226       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5227       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5228       Ops.push_back(AddTOC);
5229     }
5230 
5231     // Add the register used for the environment pointer.
5232     if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5233       Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5234                                     RegVT));
5235 
5236 
5237     // Add CTR register as callee so a bctr can be emitted later.
5238     if (CFlags.IsTailCall)
5239       Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5240   }
5241 
5242   // If this is a tail call add stack pointer delta.
5243   if (CFlags.IsTailCall)
5244     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5245 
5246   // Add argument registers to the end of the list so that they are known live
5247   // into the call.
5248   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5249     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5250                                   RegsToPass[i].second.getValueType()));
5251 
5252   // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5253   // no way to mark dependencies as implicit here.
5254   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5255   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5256        !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5257     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5258 
5259   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5260   if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5261     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5262 
5263   // Add a register mask operand representing the call-preserved registers.
5264   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5265   const uint32_t *Mask =
5266       TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5267   assert(Mask && "Missing call preserved mask for calling convention");
5268   Ops.push_back(DAG.getRegisterMask(Mask));
5269 
5270   // If the glue is valid, it is the last operand.
5271   if (Glue.getNode())
5272     Ops.push_back(Glue);
5273 }
5274 
5275 SDValue PPCTargetLowering::FinishCall(
5276     CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5277     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5278     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5279     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5280     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5281 
5282   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5283       Subtarget.isAIXABI())
5284     setUsesTOCBasePtr(DAG);
5285 
5286   unsigned CallOpc =
5287       getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5288                     Subtarget, DAG.getTarget());
5289 
5290   if (!CFlags.IsIndirect)
5291     Callee = transformCallee(Callee, DAG, dl, Subtarget);
5292   else if (Subtarget.usesFunctionDescriptors())
5293     prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5294                                   dl, CFlags.HasNest, Subtarget);
5295   else
5296     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5297 
5298   // Build the operand list for the call instruction.
5299   SmallVector<SDValue, 8> Ops;
5300   buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5301                     SPDiff, Subtarget);
5302 
5303   // Emit tail call.
5304   if (CFlags.IsTailCall) {
5305     // Indirect tail call when using PC Relative calls do not have the same
5306     // constraints.
5307     assert(((Callee.getOpcode() == ISD::Register &&
5308              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5309             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5310             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5311             isa<ConstantSDNode>(Callee) ||
5312             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5313            "Expecting a global address, external symbol, absolute value, "
5314            "register or an indirect tail call when PC Relative calls are "
5315            "used.");
5316     // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5317     assert(CallOpc == PPCISD::TC_RETURN &&
5318            "Unexpected call opcode for a tail call.");
5319     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5320     return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5321   }
5322 
5323   std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5324   Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5325   DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5326   Glue = Chain.getValue(1);
5327 
5328   // When performing tail call optimization the callee pops its arguments off
5329   // the stack. Account for this here so these bytes can be pushed back on in
5330   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5331   int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5332                          getTargetMachine().Options.GuaranteedTailCallOpt)
5333                             ? NumBytes
5334                             : 0;
5335 
5336   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5337                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5338                              Glue, dl);
5339   Glue = Chain.getValue(1);
5340 
5341   return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5342                          DAG, InVals);
5343 }
5344 
5345 SDValue
5346 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5347                              SmallVectorImpl<SDValue> &InVals) const {
5348   SelectionDAG &DAG                     = CLI.DAG;
5349   SDLoc &dl                             = CLI.DL;
5350   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5351   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5352   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5353   SDValue Chain                         = CLI.Chain;
5354   SDValue Callee                        = CLI.Callee;
5355   bool &isTailCall                      = CLI.IsTailCall;
5356   CallingConv::ID CallConv              = CLI.CallConv;
5357   bool isVarArg                         = CLI.IsVarArg;
5358   bool isPatchPoint                     = CLI.IsPatchPoint;
5359   const CallBase *CB                    = CLI.CB;
5360 
5361   if (isTailCall) {
5362     if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5363       isTailCall = false;
5364     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5365       isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5366           Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5367     else
5368       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5369                                                      Ins, DAG);
5370     if (isTailCall) {
5371       ++NumTailCalls;
5372       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5373         ++NumSiblingCalls;
5374 
5375       // PC Relative calls no longer guarantee that the callee is a Global
5376       // Address Node. The callee could be an indirect tail call in which
5377       // case the SDValue for the callee could be a load (to load the address
5378       // of a function pointer) or it may be a register copy (to move the
5379       // address of the callee from a function parameter into a virtual
5380       // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5381       assert((Subtarget.isUsingPCRelativeCalls() ||
5382               isa<GlobalAddressSDNode>(Callee)) &&
5383              "Callee should be an llvm::Function object.");
5384 
5385       LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5386                         << "\nTCO callee: ");
5387       LLVM_DEBUG(Callee.dump());
5388     }
5389   }
5390 
5391   if (!isTailCall && CB && CB->isMustTailCall())
5392     report_fatal_error("failed to perform tail call elimination on a call "
5393                        "site marked musttail");
5394 
5395   // When long calls (i.e. indirect calls) are always used, calls are always
5396   // made via function pointer. If we have a function name, first translate it
5397   // into a pointer.
5398   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5399       !isTailCall)
5400     Callee = LowerGlobalAddress(Callee, DAG);
5401 
5402   CallFlags CFlags(
5403       CallConv, isTailCall, isVarArg, isPatchPoint,
5404       isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5405       // hasNest
5406       Subtarget.is64BitELFABI() &&
5407           any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5408       CLI.NoMerge);
5409 
5410   if (Subtarget.isAIXABI())
5411     return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5412                          InVals, CB);
5413 
5414   assert(Subtarget.isSVR4ABI());
5415   if (Subtarget.isPPC64())
5416     return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5417                             InVals, CB);
5418   return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5419                           InVals, CB);
5420 }
5421 
5422 SDValue PPCTargetLowering::LowerCall_32SVR4(
5423     SDValue Chain, SDValue Callee, CallFlags CFlags,
5424     const SmallVectorImpl<ISD::OutputArg> &Outs,
5425     const SmallVectorImpl<SDValue> &OutVals,
5426     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5427     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5428     const CallBase *CB) const {
5429   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5430   // of the 32-bit SVR4 ABI stack frame layout.
5431 
5432   const CallingConv::ID CallConv = CFlags.CallConv;
5433   const bool IsVarArg = CFlags.IsVarArg;
5434   const bool IsTailCall = CFlags.IsTailCall;
5435 
5436   assert((CallConv == CallingConv::C ||
5437           CallConv == CallingConv::Cold ||
5438           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5439 
5440   const Align PtrAlign(4);
5441 
5442   MachineFunction &MF = DAG.getMachineFunction();
5443 
5444   // Mark this function as potentially containing a function that contains a
5445   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5446   // and restoring the callers stack pointer in this functions epilog. This is
5447   // done because by tail calling the called function might overwrite the value
5448   // in this function's (MF) stack pointer stack slot 0(SP).
5449   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5450       CallConv == CallingConv::Fast)
5451     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5452 
5453   // Count how many bytes are to be pushed on the stack, including the linkage
5454   // area, parameter list area and the part of the local variable space which
5455   // contains copies of aggregates which are passed by value.
5456 
5457   // Assign locations to all of the outgoing arguments.
5458   SmallVector<CCValAssign, 16> ArgLocs;
5459   PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5460 
5461   // Reserve space for the linkage area on the stack.
5462   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5463                        PtrAlign);
5464   if (useSoftFloat())
5465     CCInfo.PreAnalyzeCallOperands(Outs);
5466 
5467   if (IsVarArg) {
5468     // Handle fixed and variable vector arguments differently.
5469     // Fixed vector arguments go into registers as long as registers are
5470     // available. Variable vector arguments always go into memory.
5471     unsigned NumArgs = Outs.size();
5472 
5473     for (unsigned i = 0; i != NumArgs; ++i) {
5474       MVT ArgVT = Outs[i].VT;
5475       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5476       bool Result;
5477 
5478       if (Outs[i].IsFixed) {
5479         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5480                                CCInfo);
5481       } else {
5482         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5483                                       ArgFlags, CCInfo);
5484       }
5485 
5486       if (Result) {
5487 #ifndef NDEBUG
5488         errs() << "Call operand #" << i << " has unhandled type "
5489              << EVT(ArgVT).getEVTString() << "\n";
5490 #endif
5491         llvm_unreachable(nullptr);
5492       }
5493     }
5494   } else {
5495     // All arguments are treated the same.
5496     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5497   }
5498   CCInfo.clearWasPPCF128();
5499 
5500   // Assign locations to all of the outgoing aggregate by value arguments.
5501   SmallVector<CCValAssign, 16> ByValArgLocs;
5502   CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5503 
5504   // Reserve stack space for the allocations in CCInfo.
5505   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5506 
5507   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5508 
5509   // Size of the linkage area, parameter list area and the part of the local
5510   // space variable where copies of aggregates which are passed by value are
5511   // stored.
5512   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5513 
5514   // Calculate by how many bytes the stack has to be adjusted in case of tail
5515   // call optimization.
5516   int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5517 
5518   // Adjust the stack pointer for the new arguments...
5519   // These operations are automatically eliminated by the prolog/epilog pass
5520   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5521   SDValue CallSeqStart = Chain;
5522 
5523   // Load the return address and frame pointer so it can be moved somewhere else
5524   // later.
5525   SDValue LROp, FPOp;
5526   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5527 
5528   // Set up a copy of the stack pointer for use loading and storing any
5529   // arguments that may not fit in the registers available for argument
5530   // passing.
5531   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5532 
5533   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5534   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5535   SmallVector<SDValue, 8> MemOpChains;
5536 
5537   bool seenFloatArg = false;
5538   // Walk the register/memloc assignments, inserting copies/loads.
5539   // i - Tracks the index into the list of registers allocated for the call
5540   // RealArgIdx - Tracks the index into the list of actual function arguments
5541   // j - Tracks the index into the list of byval arguments
5542   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5543        i != e;
5544        ++i, ++RealArgIdx) {
5545     CCValAssign &VA = ArgLocs[i];
5546     SDValue Arg = OutVals[RealArgIdx];
5547     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5548 
5549     if (Flags.isByVal()) {
5550       // Argument is an aggregate which is passed by value, thus we need to
5551       // create a copy of it in the local variable space of the current stack
5552       // frame (which is the stack frame of the caller) and pass the address of
5553       // this copy to the callee.
5554       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5555       CCValAssign &ByValVA = ByValArgLocs[j++];
5556       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5557 
5558       // Memory reserved in the local variable space of the callers stack frame.
5559       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5560 
5561       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5562       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5563                            StackPtr, PtrOff);
5564 
5565       // Create a copy of the argument in the local area of the current
5566       // stack frame.
5567       SDValue MemcpyCall =
5568         CreateCopyOfByValArgument(Arg, PtrOff,
5569                                   CallSeqStart.getNode()->getOperand(0),
5570                                   Flags, DAG, dl);
5571 
5572       // This must go outside the CALLSEQ_START..END.
5573       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5574                                                      SDLoc(MemcpyCall));
5575       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5576                              NewCallSeqStart.getNode());
5577       Chain = CallSeqStart = NewCallSeqStart;
5578 
5579       // Pass the address of the aggregate copy on the stack either in a
5580       // physical register or in the parameter list area of the current stack
5581       // frame to the callee.
5582       Arg = PtrOff;
5583     }
5584 
5585     // When useCRBits() is true, there can be i1 arguments.
5586     // It is because getRegisterType(MVT::i1) => MVT::i1,
5587     // and for other integer types getRegisterType() => MVT::i32.
5588     // Extend i1 and ensure callee will get i32.
5589     if (Arg.getValueType() == MVT::i1)
5590       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5591                         dl, MVT::i32, Arg);
5592 
5593     if (VA.isRegLoc()) {
5594       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5595       // Put argument in a physical register.
5596       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5597         bool IsLE = Subtarget.isLittleEndian();
5598         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5599                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5600         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5601         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5602                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5603         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5604                              SVal.getValue(0)));
5605       } else
5606         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5607     } else {
5608       // Put argument in the parameter list area of the current stack frame.
5609       assert(VA.isMemLoc());
5610       unsigned LocMemOffset = VA.getLocMemOffset();
5611 
5612       if (!IsTailCall) {
5613         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5614         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5615                              StackPtr, PtrOff);
5616 
5617         MemOpChains.push_back(
5618             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5619       } else {
5620         // Calculate and remember argument location.
5621         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5622                                  TailCallArguments);
5623       }
5624     }
5625   }
5626 
5627   if (!MemOpChains.empty())
5628     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5629 
5630   // Build a sequence of copy-to-reg nodes chained together with token chain
5631   // and flag operands which copy the outgoing args into the appropriate regs.
5632   SDValue InFlag;
5633   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5634     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5635                              RegsToPass[i].second, InFlag);
5636     InFlag = Chain.getValue(1);
5637   }
5638 
5639   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5640   // registers.
5641   if (IsVarArg) {
5642     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5643     SDValue Ops[] = { Chain, InFlag };
5644 
5645     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5646                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5647 
5648     InFlag = Chain.getValue(1);
5649   }
5650 
5651   if (IsTailCall)
5652     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5653                     TailCallArguments);
5654 
5655   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5656                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
5657 }
5658 
5659 // Copy an argument into memory, being careful to do this outside the
5660 // call sequence for the call to which the argument belongs.
5661 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5662     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5663     SelectionDAG &DAG, const SDLoc &dl) const {
5664   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5665                         CallSeqStart.getNode()->getOperand(0),
5666                         Flags, DAG, dl);
5667   // The MEMCPY must go outside the CALLSEQ_START..END.
5668   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5669   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5670                                                  SDLoc(MemcpyCall));
5671   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5672                          NewCallSeqStart.getNode());
5673   return NewCallSeqStart;
5674 }
5675 
5676 SDValue PPCTargetLowering::LowerCall_64SVR4(
5677     SDValue Chain, SDValue Callee, CallFlags CFlags,
5678     const SmallVectorImpl<ISD::OutputArg> &Outs,
5679     const SmallVectorImpl<SDValue> &OutVals,
5680     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5681     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5682     const CallBase *CB) const {
5683   bool isELFv2ABI = Subtarget.isELFv2ABI();
5684   bool isLittleEndian = Subtarget.isLittleEndian();
5685   unsigned NumOps = Outs.size();
5686   bool IsSibCall = false;
5687   bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5688 
5689   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5690   unsigned PtrByteSize = 8;
5691 
5692   MachineFunction &MF = DAG.getMachineFunction();
5693 
5694   if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5695     IsSibCall = true;
5696 
5697   // Mark this function as potentially containing a function that contains a
5698   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5699   // and restoring the callers stack pointer in this functions epilog. This is
5700   // done because by tail calling the called function might overwrite the value
5701   // in this function's (MF) stack pointer stack slot 0(SP).
5702   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5703     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5704 
5705   assert(!(IsFastCall && CFlags.IsVarArg) &&
5706          "fastcc not supported on varargs functions");
5707 
5708   // Count how many bytes are to be pushed on the stack, including the linkage
5709   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5710   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5711   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5712   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5713   unsigned NumBytes = LinkageSize;
5714   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5715 
5716   static const MCPhysReg GPR[] = {
5717     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5718     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5719   };
5720   static const MCPhysReg VR[] = {
5721     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5722     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5723   };
5724 
5725   const unsigned NumGPRs = array_lengthof(GPR);
5726   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5727   const unsigned NumVRs  = array_lengthof(VR);
5728 
5729   // On ELFv2, we can avoid allocating the parameter area if all the arguments
5730   // can be passed to the callee in registers.
5731   // For the fast calling convention, there is another check below.
5732   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5733   bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5734   if (!HasParameterArea) {
5735     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5736     unsigned AvailableFPRs = NumFPRs;
5737     unsigned AvailableVRs = NumVRs;
5738     unsigned NumBytesTmp = NumBytes;
5739     for (unsigned i = 0; i != NumOps; ++i) {
5740       if (Outs[i].Flags.isNest()) continue;
5741       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5742                                  PtrByteSize, LinkageSize, ParamAreaSize,
5743                                  NumBytesTmp, AvailableFPRs, AvailableVRs))
5744         HasParameterArea = true;
5745     }
5746   }
5747 
5748   // When using the fast calling convention, we don't provide backing for
5749   // arguments that will be in registers.
5750   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5751 
5752   // Avoid allocating parameter area for fastcc functions if all the arguments
5753   // can be passed in the registers.
5754   if (IsFastCall)
5755     HasParameterArea = false;
5756 
5757   // Add up all the space actually used.
5758   for (unsigned i = 0; i != NumOps; ++i) {
5759     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5760     EVT ArgVT = Outs[i].VT;
5761     EVT OrigVT = Outs[i].ArgVT;
5762 
5763     if (Flags.isNest())
5764       continue;
5765 
5766     if (IsFastCall) {
5767       if (Flags.isByVal()) {
5768         NumGPRsUsed += (Flags.getByValSize()+7)/8;
5769         if (NumGPRsUsed > NumGPRs)
5770           HasParameterArea = true;
5771       } else {
5772         switch (ArgVT.getSimpleVT().SimpleTy) {
5773         default: llvm_unreachable("Unexpected ValueType for argument!");
5774         case MVT::i1:
5775         case MVT::i32:
5776         case MVT::i64:
5777           if (++NumGPRsUsed <= NumGPRs)
5778             continue;
5779           break;
5780         case MVT::v4i32:
5781         case MVT::v8i16:
5782         case MVT::v16i8:
5783         case MVT::v2f64:
5784         case MVT::v2i64:
5785         case MVT::v1i128:
5786         case MVT::f128:
5787           if (++NumVRsUsed <= NumVRs)
5788             continue;
5789           break;
5790         case MVT::v4f32:
5791           if (++NumVRsUsed <= NumVRs)
5792             continue;
5793           break;
5794         case MVT::f32:
5795         case MVT::f64:
5796           if (++NumFPRsUsed <= NumFPRs)
5797             continue;
5798           break;
5799         }
5800         HasParameterArea = true;
5801       }
5802     }
5803 
5804     /* Respect alignment of argument on the stack.  */
5805     auto Alignement =
5806         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5807     NumBytes = alignTo(NumBytes, Alignement);
5808 
5809     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5810     if (Flags.isInConsecutiveRegsLast())
5811       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5812   }
5813 
5814   unsigned NumBytesActuallyUsed = NumBytes;
5815 
5816   // In the old ELFv1 ABI,
5817   // the prolog code of the callee may store up to 8 GPR argument registers to
5818   // the stack, allowing va_start to index over them in memory if its varargs.
5819   // Because we cannot tell if this is needed on the caller side, we have to
5820   // conservatively assume that it is needed.  As such, make sure we have at
5821   // least enough stack space for the caller to store the 8 GPRs.
5822   // In the ELFv2 ABI, we allocate the parameter area iff a callee
5823   // really requires memory operands, e.g. a vararg function.
5824   if (HasParameterArea)
5825     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5826   else
5827     NumBytes = LinkageSize;
5828 
5829   // Tail call needs the stack to be aligned.
5830   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5831     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5832 
5833   int SPDiff = 0;
5834 
5835   // Calculate by how many bytes the stack has to be adjusted in case of tail
5836   // call optimization.
5837   if (!IsSibCall)
5838     SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
5839 
5840   // To protect arguments on the stack from being clobbered in a tail call,
5841   // force all the loads to happen before doing any other lowering.
5842   if (CFlags.IsTailCall)
5843     Chain = DAG.getStackArgumentTokenFactor(Chain);
5844 
5845   // Adjust the stack pointer for the new arguments...
5846   // These operations are automatically eliminated by the prolog/epilog pass
5847   if (!IsSibCall)
5848     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5849   SDValue CallSeqStart = Chain;
5850 
5851   // Load the return address and frame pointer so it can be move somewhere else
5852   // later.
5853   SDValue LROp, FPOp;
5854   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5855 
5856   // Set up a copy of the stack pointer for use loading and storing any
5857   // arguments that may not fit in the registers available for argument
5858   // passing.
5859   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5860 
5861   // Figure out which arguments are going to go in registers, and which in
5862   // memory.  Also, if this is a vararg function, floating point operations
5863   // must be stored to our stack, and loaded into integer regs as well, if
5864   // any integer regs are available for argument passing.
5865   unsigned ArgOffset = LinkageSize;
5866 
5867   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5868   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5869 
5870   SmallVector<SDValue, 8> MemOpChains;
5871   for (unsigned i = 0; i != NumOps; ++i) {
5872     SDValue Arg = OutVals[i];
5873     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5874     EVT ArgVT = Outs[i].VT;
5875     EVT OrigVT = Outs[i].ArgVT;
5876 
5877     // PtrOff will be used to store the current argument to the stack if a
5878     // register cannot be found for it.
5879     SDValue PtrOff;
5880 
5881     // We re-align the argument offset for each argument, except when using the
5882     // fast calling convention, when we need to make sure we do that only when
5883     // we'll actually use a stack slot.
5884     auto ComputePtrOff = [&]() {
5885       /* Respect alignment of argument on the stack.  */
5886       auto Alignment =
5887           CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5888       ArgOffset = alignTo(ArgOffset, Alignment);
5889 
5890       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5891 
5892       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5893     };
5894 
5895     if (!IsFastCall) {
5896       ComputePtrOff();
5897 
5898       /* Compute GPR index associated with argument offset.  */
5899       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5900       GPR_idx = std::min(GPR_idx, NumGPRs);
5901     }
5902 
5903     // Promote integers to 64-bit values.
5904     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5905       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5906       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5907       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5908     }
5909 
5910     // FIXME memcpy is used way more than necessary.  Correctness first.
5911     // Note: "by value" is code for passing a structure by value, not
5912     // basic types.
5913     if (Flags.isByVal()) {
5914       // Note: Size includes alignment padding, so
5915       //   struct x { short a; char b; }
5916       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5917       // These are the proper values we need for right-justifying the
5918       // aggregate in a parameter register.
5919       unsigned Size = Flags.getByValSize();
5920 
5921       // An empty aggregate parameter takes up no storage and no
5922       // registers.
5923       if (Size == 0)
5924         continue;
5925 
5926       if (IsFastCall)
5927         ComputePtrOff();
5928 
5929       // All aggregates smaller than 8 bytes must be passed right-justified.
5930       if (Size==1 || Size==2 || Size==4) {
5931         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5932         if (GPR_idx != NumGPRs) {
5933           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5934                                         MachinePointerInfo(), VT);
5935           MemOpChains.push_back(Load.getValue(1));
5936           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5937 
5938           ArgOffset += PtrByteSize;
5939           continue;
5940         }
5941       }
5942 
5943       if (GPR_idx == NumGPRs && Size < 8) {
5944         SDValue AddPtr = PtrOff;
5945         if (!isLittleEndian) {
5946           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5947                                           PtrOff.getValueType());
5948           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5949         }
5950         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5951                                                           CallSeqStart,
5952                                                           Flags, DAG, dl);
5953         ArgOffset += PtrByteSize;
5954         continue;
5955       }
5956       // Copy entire object into memory.  There are cases where gcc-generated
5957       // code assumes it is there, even if it could be put entirely into
5958       // registers.  (This is not what the doc says.)
5959 
5960       // FIXME: The above statement is likely due to a misunderstanding of the
5961       // documents.  All arguments must be copied into the parameter area BY
5962       // THE CALLEE in the event that the callee takes the address of any
5963       // formal argument.  That has not yet been implemented.  However, it is
5964       // reasonable to use the stack area as a staging area for the register
5965       // load.
5966 
5967       // Skip this for small aggregates, as we will use the same slot for a
5968       // right-justified copy, below.
5969       if (Size >= 8)
5970         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5971                                                           CallSeqStart,
5972                                                           Flags, DAG, dl);
5973 
5974       // When a register is available, pass a small aggregate right-justified.
5975       if (Size < 8 && GPR_idx != NumGPRs) {
5976         // The easiest way to get this right-justified in a register
5977         // is to copy the structure into the rightmost portion of a
5978         // local variable slot, then load the whole slot into the
5979         // register.
5980         // FIXME: The memcpy seems to produce pretty awful code for
5981         // small aggregates, particularly for packed ones.
5982         // FIXME: It would be preferable to use the slot in the
5983         // parameter save area instead of a new local variable.
5984         SDValue AddPtr = PtrOff;
5985         if (!isLittleEndian) {
5986           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5987           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5988         }
5989         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5990                                                           CallSeqStart,
5991                                                           Flags, DAG, dl);
5992 
5993         // Load the slot into the register.
5994         SDValue Load =
5995             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
5996         MemOpChains.push_back(Load.getValue(1));
5997         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5998 
5999         // Done with this argument.
6000         ArgOffset += PtrByteSize;
6001         continue;
6002       }
6003 
6004       // For aggregates larger than PtrByteSize, copy the pieces of the
6005       // object that fit into registers from the parameter save area.
6006       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6007         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6008         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6009         if (GPR_idx != NumGPRs) {
6010           SDValue Load =
6011               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6012           MemOpChains.push_back(Load.getValue(1));
6013           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6014           ArgOffset += PtrByteSize;
6015         } else {
6016           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6017           break;
6018         }
6019       }
6020       continue;
6021     }
6022 
6023     switch (Arg.getSimpleValueType().SimpleTy) {
6024     default: llvm_unreachable("Unexpected ValueType for argument!");
6025     case MVT::i1:
6026     case MVT::i32:
6027     case MVT::i64:
6028       if (Flags.isNest()) {
6029         // The 'nest' parameter, if any, is passed in R11.
6030         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6031         break;
6032       }
6033 
6034       // These can be scalar arguments or elements of an integer array type
6035       // passed directly.  Clang may use those instead of "byval" aggregate
6036       // types to avoid forcing arguments to memory unnecessarily.
6037       if (GPR_idx != NumGPRs) {
6038         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6039       } else {
6040         if (IsFastCall)
6041           ComputePtrOff();
6042 
6043         assert(HasParameterArea &&
6044                "Parameter area must exist to pass an argument in memory.");
6045         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6046                          true, CFlags.IsTailCall, false, MemOpChains,
6047                          TailCallArguments, dl);
6048         if (IsFastCall)
6049           ArgOffset += PtrByteSize;
6050       }
6051       if (!IsFastCall)
6052         ArgOffset += PtrByteSize;
6053       break;
6054     case MVT::f32:
6055     case MVT::f64: {
6056       // These can be scalar arguments or elements of a float array type
6057       // passed directly.  The latter are used to implement ELFv2 homogenous
6058       // float aggregates.
6059 
6060       // Named arguments go into FPRs first, and once they overflow, the
6061       // remaining arguments go into GPRs and then the parameter save area.
6062       // Unnamed arguments for vararg functions always go to GPRs and
6063       // then the parameter save area.  For now, put all arguments to vararg
6064       // routines always in both locations (FPR *and* GPR or stack slot).
6065       bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6066       bool NeededLoad = false;
6067 
6068       // First load the argument into the next available FPR.
6069       if (FPR_idx != NumFPRs)
6070         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6071 
6072       // Next, load the argument into GPR or stack slot if needed.
6073       if (!NeedGPROrStack)
6074         ;
6075       else if (GPR_idx != NumGPRs && !IsFastCall) {
6076         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6077         // once we support fp <-> gpr moves.
6078 
6079         // In the non-vararg case, this can only ever happen in the
6080         // presence of f32 array types, since otherwise we never run
6081         // out of FPRs before running out of GPRs.
6082         SDValue ArgVal;
6083 
6084         // Double values are always passed in a single GPR.
6085         if (Arg.getValueType() != MVT::f32) {
6086           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6087 
6088         // Non-array float values are extended and passed in a GPR.
6089         } else if (!Flags.isInConsecutiveRegs()) {
6090           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6091           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6092 
6093         // If we have an array of floats, we collect every odd element
6094         // together with its predecessor into one GPR.
6095         } else if (ArgOffset % PtrByteSize != 0) {
6096           SDValue Lo, Hi;
6097           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6098           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6099           if (!isLittleEndian)
6100             std::swap(Lo, Hi);
6101           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6102 
6103         // The final element, if even, goes into the first half of a GPR.
6104         } else if (Flags.isInConsecutiveRegsLast()) {
6105           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6106           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6107           if (!isLittleEndian)
6108             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6109                                  DAG.getConstant(32, dl, MVT::i32));
6110 
6111         // Non-final even elements are skipped; they will be handled
6112         // together the with subsequent argument on the next go-around.
6113         } else
6114           ArgVal = SDValue();
6115 
6116         if (ArgVal.getNode())
6117           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6118       } else {
6119         if (IsFastCall)
6120           ComputePtrOff();
6121 
6122         // Single-precision floating-point values are mapped to the
6123         // second (rightmost) word of the stack doubleword.
6124         if (Arg.getValueType() == MVT::f32 &&
6125             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6126           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6127           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6128         }
6129 
6130         assert(HasParameterArea &&
6131                "Parameter area must exist to pass an argument in memory.");
6132         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6133                          true, CFlags.IsTailCall, false, MemOpChains,
6134                          TailCallArguments, dl);
6135 
6136         NeededLoad = true;
6137       }
6138       // When passing an array of floats, the array occupies consecutive
6139       // space in the argument area; only round up to the next doubleword
6140       // at the end of the array.  Otherwise, each float takes 8 bytes.
6141       if (!IsFastCall || NeededLoad) {
6142         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6143                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6144         if (Flags.isInConsecutiveRegsLast())
6145           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6146       }
6147       break;
6148     }
6149     case MVT::v4f32:
6150     case MVT::v4i32:
6151     case MVT::v8i16:
6152     case MVT::v16i8:
6153     case MVT::v2f64:
6154     case MVT::v2i64:
6155     case MVT::v1i128:
6156     case MVT::f128:
6157       // These can be scalar arguments or elements of a vector array type
6158       // passed directly.  The latter are used to implement ELFv2 homogenous
6159       // vector aggregates.
6160 
6161       // For a varargs call, named arguments go into VRs or on the stack as
6162       // usual; unnamed arguments always go to the stack or the corresponding
6163       // GPRs when within range.  For now, we always put the value in both
6164       // locations (or even all three).
6165       if (CFlags.IsVarArg) {
6166         assert(HasParameterArea &&
6167                "Parameter area must exist if we have a varargs call.");
6168         // We could elide this store in the case where the object fits
6169         // entirely in R registers.  Maybe later.
6170         SDValue Store =
6171             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6172         MemOpChains.push_back(Store);
6173         if (VR_idx != NumVRs) {
6174           SDValue Load =
6175               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6176           MemOpChains.push_back(Load.getValue(1));
6177           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6178         }
6179         ArgOffset += 16;
6180         for (unsigned i=0; i<16; i+=PtrByteSize) {
6181           if (GPR_idx == NumGPRs)
6182             break;
6183           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6184                                    DAG.getConstant(i, dl, PtrVT));
6185           SDValue Load =
6186               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6187           MemOpChains.push_back(Load.getValue(1));
6188           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6189         }
6190         break;
6191       }
6192 
6193       // Non-varargs Altivec params go into VRs or on the stack.
6194       if (VR_idx != NumVRs) {
6195         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6196       } else {
6197         if (IsFastCall)
6198           ComputePtrOff();
6199 
6200         assert(HasParameterArea &&
6201                "Parameter area must exist to pass an argument in memory.");
6202         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6203                          true, CFlags.IsTailCall, true, MemOpChains,
6204                          TailCallArguments, dl);
6205         if (IsFastCall)
6206           ArgOffset += 16;
6207       }
6208 
6209       if (!IsFastCall)
6210         ArgOffset += 16;
6211       break;
6212     }
6213   }
6214 
6215   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6216          "mismatch in size of parameter area");
6217   (void)NumBytesActuallyUsed;
6218 
6219   if (!MemOpChains.empty())
6220     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6221 
6222   // Check if this is an indirect call (MTCTR/BCTRL).
6223   // See prepareDescriptorIndirectCall and buildCallOperands for more
6224   // information about calls through function pointers in the 64-bit SVR4 ABI.
6225   if (CFlags.IsIndirect) {
6226     // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6227     // caller in the TOC save area.
6228     if (isTOCSaveRestoreRequired(Subtarget)) {
6229       assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6230       // Load r2 into a virtual register and store it to the TOC save area.
6231       setUsesTOCBasePtr(DAG);
6232       SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6233       // TOC save area offset.
6234       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6235       SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6236       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6237       Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6238                            MachinePointerInfo::getStack(
6239                                DAG.getMachineFunction(), TOCSaveOffset));
6240     }
6241     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6242     // This does not mean the MTCTR instruction must use R12; it's easier
6243     // to model this as an extra parameter, so do that.
6244     if (isELFv2ABI && !CFlags.IsPatchPoint)
6245       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6246   }
6247 
6248   // Build a sequence of copy-to-reg nodes chained together with token chain
6249   // and flag operands which copy the outgoing args into the appropriate regs.
6250   SDValue InFlag;
6251   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6252     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6253                              RegsToPass[i].second, InFlag);
6254     InFlag = Chain.getValue(1);
6255   }
6256 
6257   if (CFlags.IsTailCall && !IsSibCall)
6258     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6259                     TailCallArguments);
6260 
6261   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6262                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6263 }
6264 
6265 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6266                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6267                    CCState &State) {
6268 
6269   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6270       State.getMachineFunction().getSubtarget());
6271   const bool IsPPC64 = Subtarget.isPPC64();
6272   const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6273   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6274 
6275   if (ValVT.isVector() && !State.getMachineFunction()
6276                                .getTarget()
6277                                .Options.EnableAIXExtendedAltivecABI)
6278     report_fatal_error("the default Altivec AIX ABI is not yet supported");
6279 
6280   if (ValVT == MVT::f128)
6281     report_fatal_error("f128 is unimplemented on AIX.");
6282 
6283   if (ArgFlags.isNest())
6284     report_fatal_error("Nest arguments are unimplemented.");
6285 
6286   static const MCPhysReg GPR_32[] = {// 32-bit registers.
6287                                      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6288                                      PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6289   static const MCPhysReg GPR_64[] = {// 64-bit registers.
6290                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6291                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6292 
6293   static const MCPhysReg VR[] = {// Vector registers.
6294                                  PPC::V2,  PPC::V3,  PPC::V4,  PPC::V5,
6295                                  PPC::V6,  PPC::V7,  PPC::V8,  PPC::V9,
6296                                  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6297 
6298   if (ArgFlags.isByVal()) {
6299     if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6300       report_fatal_error("Pass-by-value arguments with alignment greater than "
6301                          "register width are not supported.");
6302 
6303     const unsigned ByValSize = ArgFlags.getByValSize();
6304 
6305     // An empty aggregate parameter takes up no storage and no registers,
6306     // but needs a MemLoc for a stack slot for the formal arguments side.
6307     if (ByValSize == 0) {
6308       State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6309                                        State.getNextStackOffset(), RegVT,
6310                                        LocInfo));
6311       return false;
6312     }
6313 
6314     const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6315     unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6316     for (const unsigned E = Offset + StackSize; Offset < E;
6317          Offset += PtrAlign.value()) {
6318       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6319         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6320       else {
6321         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6322                                          Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6323                                          LocInfo));
6324         break;
6325       }
6326     }
6327     return false;
6328   }
6329 
6330   // Arguments always reserve parameter save area.
6331   switch (ValVT.SimpleTy) {
6332   default:
6333     report_fatal_error("Unhandled value type for argument.");
6334   case MVT::i64:
6335     // i64 arguments should have been split to i32 for PPC32.
6336     assert(IsPPC64 && "PPC32 should have split i64 values.");
6337     LLVM_FALLTHROUGH;
6338   case MVT::i1:
6339   case MVT::i32: {
6340     const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6341     // AIX integer arguments are always passed in register width.
6342     if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6343       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6344                                   : CCValAssign::LocInfo::ZExt;
6345     if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6346       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6347     else
6348       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6349 
6350     return false;
6351   }
6352   case MVT::f32:
6353   case MVT::f64: {
6354     // Parameter save area (PSA) is reserved even if the float passes in fpr.
6355     const unsigned StoreSize = LocVT.getStoreSize();
6356     // Floats are always 4-byte aligned in the PSA on AIX.
6357     // This includes f64 in 64-bit mode for ABI compatibility.
6358     const unsigned Offset =
6359         State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6360     unsigned FReg = State.AllocateReg(FPR);
6361     if (FReg)
6362       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6363 
6364     // Reserve and initialize GPRs or initialize the PSA as required.
6365     for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6366       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6367         assert(FReg && "An FPR should be available when a GPR is reserved.");
6368         if (State.isVarArg()) {
6369           // Successfully reserved GPRs are only initialized for vararg calls.
6370           // Custom handling is required for:
6371           //   f64 in PPC32 needs to be split into 2 GPRs.
6372           //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6373           State.addLoc(
6374               CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6375         }
6376       } else {
6377         // If there are insufficient GPRs, the PSA needs to be initialized.
6378         // Initialization occurs even if an FPR was initialized for
6379         // compatibility with the AIX XL compiler. The full memory for the
6380         // argument will be initialized even if a prior word is saved in GPR.
6381         // A custom memLoc is used when the argument also passes in FPR so
6382         // that the callee handling can skip over it easily.
6383         State.addLoc(
6384             FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6385                                              LocInfo)
6386                  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6387         break;
6388       }
6389     }
6390 
6391     return false;
6392   }
6393   case MVT::v4f32:
6394   case MVT::v4i32:
6395   case MVT::v8i16:
6396   case MVT::v16i8:
6397   case MVT::v2i64:
6398   case MVT::v2f64:
6399   case MVT::v1i128: {
6400     if (State.isVarArg())
6401       report_fatal_error(
6402           "variadic arguments for vector types are unimplemented for AIX");
6403 
6404     if (unsigned VReg = State.AllocateReg(VR)) {
6405       State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6406       return false;
6407     }
6408 
6409     const unsigned VecSize = 16;
6410     const unsigned Offset = State.AllocateStack(VecSize, Align(VecSize));
6411     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6412     return false;
6413   }
6414   }
6415   return true;
6416 }
6417 
6418 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
6419                                                     bool IsPPC64) {
6420   assert((IsPPC64 || SVT != MVT::i64) &&
6421          "i64 should have been split for 32-bit codegen.");
6422 
6423   switch (SVT) {
6424   default:
6425     report_fatal_error("Unexpected value type for formal argument");
6426   case MVT::i1:
6427   case MVT::i32:
6428   case MVT::i64:
6429     return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6430   case MVT::f32:
6431     return &PPC::F4RCRegClass;
6432   case MVT::f64:
6433     return &PPC::F8RCRegClass;
6434   case MVT::v4f32:
6435   case MVT::v4i32:
6436   case MVT::v8i16:
6437   case MVT::v16i8:
6438   case MVT::v2i64:
6439   case MVT::v2f64:
6440   case MVT::v1i128:
6441     return &PPC::VRRCRegClass;
6442   }
6443 }
6444 
6445 static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
6446                                         SelectionDAG &DAG, SDValue ArgValue,
6447                                         MVT LocVT, const SDLoc &dl) {
6448   assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6449   assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6450 
6451   if (Flags.isSExt())
6452     ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6453                            DAG.getValueType(ValVT));
6454   else if (Flags.isZExt())
6455     ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6456                            DAG.getValueType(ValVT));
6457 
6458   return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6459 }
6460 
6461 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6462   const unsigned LASize = FL->getLinkageSize();
6463 
6464   if (PPC::GPRCRegClass.contains(Reg)) {
6465     assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6466            "Reg must be a valid argument register!");
6467     return LASize + 4 * (Reg - PPC::R3);
6468   }
6469 
6470   if (PPC::G8RCRegClass.contains(Reg)) {
6471     assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6472            "Reg must be a valid argument register!");
6473     return LASize + 8 * (Reg - PPC::X3);
6474   }
6475 
6476   llvm_unreachable("Only general purpose registers expected.");
6477 }
6478 
6479 //   AIX ABI Stack Frame Layout:
6480 //
6481 //   Low Memory +--------------------------------------------+
6482 //   SP   +---> | Back chain                                 | ---+
6483 //        |     +--------------------------------------------+    |
6484 //        |     | Saved Condition Register                   |    |
6485 //        |     +--------------------------------------------+    |
6486 //        |     | Saved Linkage Register                     |    |
6487 //        |     +--------------------------------------------+    | Linkage Area
6488 //        |     | Reserved for compilers                     |    |
6489 //        |     +--------------------------------------------+    |
6490 //        |     | Reserved for binders                       |    |
6491 //        |     +--------------------------------------------+    |
6492 //        |     | Saved TOC pointer                          | ---+
6493 //        |     +--------------------------------------------+
6494 //        |     | Parameter save area                        |
6495 //        |     +--------------------------------------------+
6496 //        |     | Alloca space                               |
6497 //        |     +--------------------------------------------+
6498 //        |     | Local variable space                       |
6499 //        |     +--------------------------------------------+
6500 //        |     | Float/int conversion temporary             |
6501 //        |     +--------------------------------------------+
6502 //        |     | Save area for AltiVec registers            |
6503 //        |     +--------------------------------------------+
6504 //        |     | AltiVec alignment padding                  |
6505 //        |     +--------------------------------------------+
6506 //        |     | Save area for VRSAVE register              |
6507 //        |     +--------------------------------------------+
6508 //        |     | Save area for General Purpose registers    |
6509 //        |     +--------------------------------------------+
6510 //        |     | Save area for Floating Point registers     |
6511 //        |     +--------------------------------------------+
6512 //        +---- | Back chain                                 |
6513 // High Memory  +--------------------------------------------+
6514 //
6515 //  Specifications:
6516 //  AIX 7.2 Assembler Language Reference
6517 //  Subroutine linkage convention
6518 
6519 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6520     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6521     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6522     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6523 
6524   assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
6525           CallConv == CallingConv::Fast) &&
6526          "Unexpected calling convention!");
6527 
6528   if (getTargetMachine().Options.GuaranteedTailCallOpt)
6529     report_fatal_error("Tail call support is unimplemented on AIX.");
6530 
6531   if (useSoftFloat())
6532     report_fatal_error("Soft float support is unimplemented on AIX.");
6533 
6534   const PPCSubtarget &Subtarget =
6535       static_cast<const PPCSubtarget &>(DAG.getSubtarget());
6536 
6537   const bool IsPPC64 = Subtarget.isPPC64();
6538   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6539 
6540   // Assign locations to all of the incoming arguments.
6541   SmallVector<CCValAssign, 16> ArgLocs;
6542   MachineFunction &MF = DAG.getMachineFunction();
6543   MachineFrameInfo &MFI = MF.getFrameInfo();
6544   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
6545   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6546 
6547   const EVT PtrVT = getPointerTy(MF.getDataLayout());
6548   // Reserve space for the linkage area on the stack.
6549   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6550   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6551   CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
6552 
6553   SmallVector<SDValue, 8> MemOps;
6554 
6555   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
6556     CCValAssign &VA = ArgLocs[I++];
6557     MVT LocVT = VA.getLocVT();
6558     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
6559     // For compatibility with the AIX XL compiler, the float args in the
6560     // parameter save area are initialized even if the argument is available
6561     // in register.  The caller is required to initialize both the register
6562     // and memory, however, the callee can choose to expect it in either.
6563     // The memloc is dismissed here because the argument is retrieved from
6564     // the register.
6565     if (VA.isMemLoc() && VA.needsCustom())
6566       continue;
6567 
6568     if (VA.isRegLoc()) {
6569       if (VA.getValVT().isScalarInteger())
6570         FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
6571       else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector())
6572         FuncInfo->appendParameterType(VA.getValVT().SimpleTy == MVT::f32
6573                                           ? PPCFunctionInfo::ShortFloatPoint
6574                                           : PPCFunctionInfo::LongFloatPoint);
6575     }
6576 
6577     if (Flags.isByVal() && VA.isMemLoc()) {
6578       const unsigned Size =
6579           alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
6580                   PtrByteSize);
6581       const int FI = MF.getFrameInfo().CreateFixedObject(
6582           Size, VA.getLocMemOffset(), /* IsImmutable */ false,
6583           /* IsAliased */ true);
6584       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6585       InVals.push_back(FIN);
6586 
6587       continue;
6588     }
6589 
6590     if (Flags.isByVal()) {
6591       assert(VA.isRegLoc() && "MemLocs should already be handled.");
6592 
6593       const MCPhysReg ArgReg = VA.getLocReg();
6594       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
6595 
6596       if (Flags.getNonZeroByValAlign() > PtrByteSize)
6597         report_fatal_error("Over aligned byvals not supported yet.");
6598 
6599       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
6600       const int FI = MF.getFrameInfo().CreateFixedObject(
6601           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
6602           /* IsAliased */ true);
6603       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6604       InVals.push_back(FIN);
6605 
6606       // Add live ins for all the RegLocs for the same ByVal.
6607       const TargetRegisterClass *RegClass =
6608           IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6609 
6610       auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
6611                                                unsigned Offset) {
6612         const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
6613         // Since the callers side has left justified the aggregate in the
6614         // register, we can simply store the entire register into the stack
6615         // slot.
6616         SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
6617         // The store to the fixedstack object is needed becuase accessing a
6618         // field of the ByVal will use a gep and load. Ideally we will optimize
6619         // to extracting the value from the register directly, and elide the
6620         // stores when the arguments address is not taken, but that will need to
6621         // be future work.
6622         SDValue Store = DAG.getStore(
6623             CopyFrom.getValue(1), dl, CopyFrom,
6624             DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
6625             MachinePointerInfo::getFixedStack(MF, FI, Offset));
6626 
6627         MemOps.push_back(Store);
6628       };
6629 
6630       unsigned Offset = 0;
6631       HandleRegLoc(VA.getLocReg(), Offset);
6632       Offset += PtrByteSize;
6633       for (; Offset != StackSize && ArgLocs[I].isRegLoc();
6634            Offset += PtrByteSize) {
6635         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
6636                "RegLocs should be for ByVal argument.");
6637 
6638         const CCValAssign RL = ArgLocs[I++];
6639         HandleRegLoc(RL.getLocReg(), Offset);
6640         FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
6641       }
6642 
6643       if (Offset != StackSize) {
6644         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
6645                "Expected MemLoc for remaining bytes.");
6646         assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
6647         // Consume the MemLoc.The InVal has already been emitted, so nothing
6648         // more needs to be done.
6649         ++I;
6650       }
6651 
6652       continue;
6653     }
6654 
6655     EVT ValVT = VA.getValVT();
6656     if (VA.isRegLoc() && !VA.needsCustom()) {
6657       MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
6658       unsigned VReg =
6659           MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
6660       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
6661       if (ValVT.isScalarInteger() &&
6662           (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
6663         ArgValue =
6664             truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
6665       }
6666       InVals.push_back(ArgValue);
6667       continue;
6668     }
6669     if (VA.isMemLoc()) {
6670       const unsigned LocSize = LocVT.getStoreSize();
6671       const unsigned ValSize = ValVT.getStoreSize();
6672       assert((ValSize <= LocSize) &&
6673              "Object size is larger than size of MemLoc");
6674       int CurArgOffset = VA.getLocMemOffset();
6675       // Objects are right-justified because AIX is big-endian.
6676       if (LocSize > ValSize)
6677         CurArgOffset += LocSize - ValSize;
6678       // Potential tail calls could cause overwriting of argument stack slots.
6679       const bool IsImmutable =
6680           !(getTargetMachine().Options.GuaranteedTailCallOpt &&
6681             (CallConv == CallingConv::Fast));
6682       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
6683       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6684       SDValue ArgValue =
6685           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
6686       InVals.push_back(ArgValue);
6687       continue;
6688     }
6689   }
6690 
6691   // On AIX a minimum of 8 words is saved to the parameter save area.
6692   const unsigned MinParameterSaveArea = 8 * PtrByteSize;
6693   // Area that is at least reserved in the caller of this function.
6694   unsigned CallerReservedArea =
6695       std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
6696 
6697   // Set the size that is at least reserved in caller of this function. Tail
6698   // call optimized function's reserved stack space needs to be aligned so
6699   // that taking the difference between two stack areas will result in an
6700   // aligned stack.
6701   CallerReservedArea =
6702       EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
6703   FuncInfo->setMinReservedArea(CallerReservedArea);
6704 
6705   if (isVarArg) {
6706     FuncInfo->setVarArgsFrameIndex(
6707         MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
6708     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
6709 
6710     static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6711                                        PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6712 
6713     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6714                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6715     const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
6716 
6717     // The fixed integer arguments of a variadic function are stored to the
6718     // VarArgsFrameIndex on the stack so that they may be loaded by
6719     // dereferencing the result of va_next.
6720     for (unsigned GPRIndex =
6721              (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
6722          GPRIndex < NumGPArgRegs; ++GPRIndex) {
6723 
6724       const unsigned VReg =
6725           IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
6726                   : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
6727 
6728       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
6729       SDValue Store =
6730           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
6731       MemOps.push_back(Store);
6732       // Increment the address for the next argument to store.
6733       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
6734       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
6735     }
6736   }
6737 
6738   if (!MemOps.empty())
6739     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
6740 
6741   return Chain;
6742 }
6743 
6744 SDValue PPCTargetLowering::LowerCall_AIX(
6745     SDValue Chain, SDValue Callee, CallFlags CFlags,
6746     const SmallVectorImpl<ISD::OutputArg> &Outs,
6747     const SmallVectorImpl<SDValue> &OutVals,
6748     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6749     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6750     const CallBase *CB) const {
6751   // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
6752   // AIX ABI stack frame layout.
6753 
6754   assert((CFlags.CallConv == CallingConv::C ||
6755           CFlags.CallConv == CallingConv::Cold ||
6756           CFlags.CallConv == CallingConv::Fast) &&
6757          "Unexpected calling convention!");
6758 
6759   if (CFlags.IsPatchPoint)
6760     report_fatal_error("This call type is unimplemented on AIX.");
6761 
6762   const PPCSubtarget& Subtarget =
6763       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
6764 
6765   MachineFunction &MF = DAG.getMachineFunction();
6766   SmallVector<CCValAssign, 16> ArgLocs;
6767   CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
6768                  *DAG.getContext());
6769 
6770   // Reserve space for the linkage save area (LSA) on the stack.
6771   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
6772   //   [SP][CR][LR][2 x reserved][TOC].
6773   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
6774   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6775   const bool IsPPC64 = Subtarget.isPPC64();
6776   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
6777   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6778   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6779   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
6780 
6781   // The prolog code of the callee may store up to 8 GPR argument registers to
6782   // the stack, allowing va_start to index over them in memory if the callee
6783   // is variadic.
6784   // Because we cannot tell if this is needed on the caller side, we have to
6785   // conservatively assume that it is needed.  As such, make sure we have at
6786   // least enough stack space for the caller to store the 8 GPRs.
6787   const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
6788   const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
6789                                      CCInfo.getNextStackOffset());
6790 
6791   // Adjust the stack pointer for the new arguments...
6792   // These operations are automatically eliminated by the prolog/epilog pass.
6793   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6794   SDValue CallSeqStart = Chain;
6795 
6796   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6797   SmallVector<SDValue, 8> MemOpChains;
6798 
6799   // Set up a copy of the stack pointer for loading and storing any
6800   // arguments that may not fit in the registers available for argument
6801   // passing.
6802   const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
6803                                    : DAG.getRegister(PPC::R1, MVT::i32);
6804 
6805   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
6806     const unsigned ValNo = ArgLocs[I].getValNo();
6807     SDValue Arg = OutVals[ValNo];
6808     ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
6809 
6810     if (Flags.isByVal()) {
6811       const unsigned ByValSize = Flags.getByValSize();
6812 
6813       // Nothing to do for zero-sized ByVals on the caller side.
6814       if (!ByValSize) {
6815         ++I;
6816         continue;
6817       }
6818 
6819       auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
6820         return DAG.getExtLoad(
6821             ISD::ZEXTLOAD, dl, PtrVT, Chain,
6822             (LoadOffset != 0)
6823                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
6824                 : Arg,
6825             MachinePointerInfo(), VT);
6826       };
6827 
6828       unsigned LoadOffset = 0;
6829 
6830       // Initialize registers, which are fully occupied by the by-val argument.
6831       while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
6832         SDValue Load = GetLoad(PtrVT, LoadOffset);
6833         MemOpChains.push_back(Load.getValue(1));
6834         LoadOffset += PtrByteSize;
6835         const CCValAssign &ByValVA = ArgLocs[I++];
6836         assert(ByValVA.getValNo() == ValNo &&
6837                "Unexpected location for pass-by-value argument.");
6838         RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
6839       }
6840 
6841       if (LoadOffset == ByValSize)
6842         continue;
6843 
6844       // There must be one more loc to handle the remainder.
6845       assert(ArgLocs[I].getValNo() == ValNo &&
6846              "Expected additional location for by-value argument.");
6847 
6848       if (ArgLocs[I].isMemLoc()) {
6849         assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
6850         const CCValAssign &ByValVA = ArgLocs[I++];
6851         ISD::ArgFlagsTy MemcpyFlags = Flags;
6852         // Only memcpy the bytes that don't pass in register.
6853         MemcpyFlags.setByValSize(ByValSize - LoadOffset);
6854         Chain = CallSeqStart = createMemcpyOutsideCallSeq(
6855             (LoadOffset != 0)
6856                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
6857                 : Arg,
6858             DAG.getObjectPtrOffset(dl, StackPtr,
6859                                    TypeSize::Fixed(ByValVA.getLocMemOffset())),
6860             CallSeqStart, MemcpyFlags, DAG, dl);
6861         continue;
6862       }
6863 
6864       // Initialize the final register residue.
6865       // Any residue that occupies the final by-val arg register must be
6866       // left-justified on AIX. Loads must be a power-of-2 size and cannot be
6867       // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
6868       // 2 and 1 byte loads.
6869       const unsigned ResidueBytes = ByValSize % PtrByteSize;
6870       assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
6871              "Unexpected register residue for by-value argument.");
6872       SDValue ResidueVal;
6873       for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
6874         const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
6875         const MVT VT =
6876             N == 1 ? MVT::i8
6877                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
6878         SDValue Load = GetLoad(VT, LoadOffset);
6879         MemOpChains.push_back(Load.getValue(1));
6880         LoadOffset += N;
6881         Bytes += N;
6882 
6883         // By-val arguments are passed left-justfied in register.
6884         // Every load here needs to be shifted, otherwise a full register load
6885         // should have been used.
6886         assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
6887                "Unexpected load emitted during handling of pass-by-value "
6888                "argument.");
6889         unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
6890         EVT ShiftAmountTy =
6891             getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
6892         SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
6893         SDValue ShiftedLoad =
6894             DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
6895         ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
6896                                               ShiftedLoad)
6897                                 : ShiftedLoad;
6898       }
6899 
6900       const CCValAssign &ByValVA = ArgLocs[I++];
6901       RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
6902       continue;
6903     }
6904 
6905     CCValAssign &VA = ArgLocs[I++];
6906     const MVT LocVT = VA.getLocVT();
6907     const MVT ValVT = VA.getValVT();
6908 
6909     switch (VA.getLocInfo()) {
6910     default:
6911       report_fatal_error("Unexpected argument extension type.");
6912     case CCValAssign::Full:
6913       break;
6914     case CCValAssign::ZExt:
6915       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6916       break;
6917     case CCValAssign::SExt:
6918       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6919       break;
6920     }
6921 
6922     if (VA.isRegLoc() && !VA.needsCustom()) {
6923       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6924       continue;
6925     }
6926 
6927     if (VA.isMemLoc()) {
6928       SDValue PtrOff =
6929           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
6930       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6931       MemOpChains.push_back(
6932           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6933 
6934       continue;
6935     }
6936 
6937     // Custom handling is used for GPR initializations for vararg float
6938     // arguments.
6939     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
6940            ValVT.isFloatingPoint() && LocVT.isInteger() &&
6941            "Unexpected register handling for calling convention.");
6942 
6943     SDValue ArgAsInt =
6944         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
6945 
6946     if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
6947       // f32 in 32-bit GPR
6948       // f64 in 64-bit GPR
6949       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
6950     else if (Arg.getValueType().getFixedSizeInBits() <
6951              LocVT.getFixedSizeInBits())
6952       // f32 in 64-bit GPR.
6953       RegsToPass.push_back(std::make_pair(
6954           VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
6955     else {
6956       // f64 in two 32-bit GPRs
6957       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
6958       assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
6959              "Unexpected custom register for argument!");
6960       CCValAssign &GPR1 = VA;
6961       SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
6962                                      DAG.getConstant(32, dl, MVT::i8));
6963       RegsToPass.push_back(std::make_pair(
6964           GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
6965 
6966       if (I != E) {
6967         // If only 1 GPR was available, there will only be one custom GPR and
6968         // the argument will also pass in memory.
6969         CCValAssign &PeekArg = ArgLocs[I];
6970         if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
6971           assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
6972           CCValAssign &GPR2 = ArgLocs[I++];
6973           RegsToPass.push_back(std::make_pair(
6974               GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
6975         }
6976       }
6977     }
6978   }
6979 
6980   if (!MemOpChains.empty())
6981     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6982 
6983   // For indirect calls, we need to save the TOC base to the stack for
6984   // restoration after the call.
6985   if (CFlags.IsIndirect) {
6986     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
6987     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
6988     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
6989     const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
6990     const unsigned TOCSaveOffset =
6991         Subtarget.getFrameLowering()->getTOCSaveOffset();
6992 
6993     setUsesTOCBasePtr(DAG);
6994     SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
6995     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6996     SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
6997     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6998     Chain = DAG.getStore(
6999         Val.getValue(1), dl, Val, AddPtr,
7000         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7001   }
7002 
7003   // Build a sequence of copy-to-reg nodes chained together with token chain
7004   // and flag operands which copy the outgoing args into the appropriate regs.
7005   SDValue InFlag;
7006   for (auto Reg : RegsToPass) {
7007     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7008     InFlag = Chain.getValue(1);
7009   }
7010 
7011   const int SPDiff = 0;
7012   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7013                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
7014 }
7015 
7016 bool
7017 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7018                                   MachineFunction &MF, bool isVarArg,
7019                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
7020                                   LLVMContext &Context) const {
7021   SmallVector<CCValAssign, 16> RVLocs;
7022   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7023   return CCInfo.CheckReturn(
7024       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7025                 ? RetCC_PPC_Cold
7026                 : RetCC_PPC);
7027 }
7028 
7029 SDValue
7030 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7031                                bool isVarArg,
7032                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7033                                const SmallVectorImpl<SDValue> &OutVals,
7034                                const SDLoc &dl, SelectionDAG &DAG) const {
7035   SmallVector<CCValAssign, 16> RVLocs;
7036   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7037                  *DAG.getContext());
7038   CCInfo.AnalyzeReturn(Outs,
7039                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7040                            ? RetCC_PPC_Cold
7041                            : RetCC_PPC);
7042 
7043   SDValue Flag;
7044   SmallVector<SDValue, 4> RetOps(1, Chain);
7045 
7046   // Copy the result values into the output registers.
7047   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7048     CCValAssign &VA = RVLocs[i];
7049     assert(VA.isRegLoc() && "Can only return in registers!");
7050 
7051     SDValue Arg = OutVals[RealResIdx];
7052 
7053     switch (VA.getLocInfo()) {
7054     default: llvm_unreachable("Unknown loc info!");
7055     case CCValAssign::Full: break;
7056     case CCValAssign::AExt:
7057       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7058       break;
7059     case CCValAssign::ZExt:
7060       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7061       break;
7062     case CCValAssign::SExt:
7063       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7064       break;
7065     }
7066     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7067       bool isLittleEndian = Subtarget.isLittleEndian();
7068       // Legalize ret f64 -> ret 2 x i32.
7069       SDValue SVal =
7070           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7071                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7072       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7073       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7074       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7075                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7076       Flag = Chain.getValue(1);
7077       VA = RVLocs[++i]; // skip ahead to next loc
7078       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7079     } else
7080       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7081     Flag = Chain.getValue(1);
7082     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7083   }
7084 
7085   RetOps[0] = Chain;  // Update chain.
7086 
7087   // Add the flag if we have it.
7088   if (Flag.getNode())
7089     RetOps.push_back(Flag);
7090 
7091   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7092 }
7093 
7094 SDValue
7095 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7096                                                 SelectionDAG &DAG) const {
7097   SDLoc dl(Op);
7098 
7099   // Get the correct type for integers.
7100   EVT IntVT = Op.getValueType();
7101 
7102   // Get the inputs.
7103   SDValue Chain = Op.getOperand(0);
7104   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7105   // Build a DYNAREAOFFSET node.
7106   SDValue Ops[2] = {Chain, FPSIdx};
7107   SDVTList VTs = DAG.getVTList(IntVT);
7108   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7109 }
7110 
7111 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7112                                              SelectionDAG &DAG) const {
7113   // When we pop the dynamic allocation we need to restore the SP link.
7114   SDLoc dl(Op);
7115 
7116   // Get the correct type for pointers.
7117   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7118 
7119   // Construct the stack pointer operand.
7120   bool isPPC64 = Subtarget.isPPC64();
7121   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7122   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7123 
7124   // Get the operands for the STACKRESTORE.
7125   SDValue Chain = Op.getOperand(0);
7126   SDValue SaveSP = Op.getOperand(1);
7127 
7128   // Load the old link SP.
7129   SDValue LoadLinkSP =
7130       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7131 
7132   // Restore the stack pointer.
7133   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7134 
7135   // Store the old link SP.
7136   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7137 }
7138 
7139 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7140   MachineFunction &MF = DAG.getMachineFunction();
7141   bool isPPC64 = Subtarget.isPPC64();
7142   EVT PtrVT = getPointerTy(MF.getDataLayout());
7143 
7144   // Get current frame pointer save index.  The users of this index will be
7145   // primarily DYNALLOC instructions.
7146   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7147   int RASI = FI->getReturnAddrSaveIndex();
7148 
7149   // If the frame pointer save index hasn't been defined yet.
7150   if (!RASI) {
7151     // Find out what the fix offset of the frame pointer save area.
7152     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7153     // Allocate the frame index for frame pointer save area.
7154     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7155     // Save the result.
7156     FI->setReturnAddrSaveIndex(RASI);
7157   }
7158   return DAG.getFrameIndex(RASI, PtrVT);
7159 }
7160 
7161 SDValue
7162 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7163   MachineFunction &MF = DAG.getMachineFunction();
7164   bool isPPC64 = Subtarget.isPPC64();
7165   EVT PtrVT = getPointerTy(MF.getDataLayout());
7166 
7167   // Get current frame pointer save index.  The users of this index will be
7168   // primarily DYNALLOC instructions.
7169   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7170   int FPSI = FI->getFramePointerSaveIndex();
7171 
7172   // If the frame pointer save index hasn't been defined yet.
7173   if (!FPSI) {
7174     // Find out what the fix offset of the frame pointer save area.
7175     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7176     // Allocate the frame index for frame pointer save area.
7177     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7178     // Save the result.
7179     FI->setFramePointerSaveIndex(FPSI);
7180   }
7181   return DAG.getFrameIndex(FPSI, PtrVT);
7182 }
7183 
7184 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7185                                                    SelectionDAG &DAG) const {
7186   MachineFunction &MF = DAG.getMachineFunction();
7187   // Get the inputs.
7188   SDValue Chain = Op.getOperand(0);
7189   SDValue Size  = Op.getOperand(1);
7190   SDLoc dl(Op);
7191 
7192   // Get the correct type for pointers.
7193   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7194   // Negate the size.
7195   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7196                                 DAG.getConstant(0, dl, PtrVT), Size);
7197   // Construct a node for the frame pointer save index.
7198   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7199   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7200   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7201   if (hasInlineStackProbe(MF))
7202     return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7203   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7204 }
7205 
7206 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7207                                                      SelectionDAG &DAG) const {
7208   MachineFunction &MF = DAG.getMachineFunction();
7209 
7210   bool isPPC64 = Subtarget.isPPC64();
7211   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7212 
7213   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7214   return DAG.getFrameIndex(FI, PtrVT);
7215 }
7216 
7217 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7218                                                SelectionDAG &DAG) const {
7219   SDLoc DL(Op);
7220   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7221                      DAG.getVTList(MVT::i32, MVT::Other),
7222                      Op.getOperand(0), Op.getOperand(1));
7223 }
7224 
7225 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7226                                                 SelectionDAG &DAG) const {
7227   SDLoc DL(Op);
7228   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7229                      Op.getOperand(0), Op.getOperand(1));
7230 }
7231 
7232 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7233   if (Op.getValueType().isVector())
7234     return LowerVectorLoad(Op, DAG);
7235 
7236   assert(Op.getValueType() == MVT::i1 &&
7237          "Custom lowering only for i1 loads");
7238 
7239   // First, load 8 bits into 32 bits, then truncate to 1 bit.
7240 
7241   SDLoc dl(Op);
7242   LoadSDNode *LD = cast<LoadSDNode>(Op);
7243 
7244   SDValue Chain = LD->getChain();
7245   SDValue BasePtr = LD->getBasePtr();
7246   MachineMemOperand *MMO = LD->getMemOperand();
7247 
7248   SDValue NewLD =
7249       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7250                      BasePtr, MVT::i8, MMO);
7251   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7252 
7253   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7254   return DAG.getMergeValues(Ops, dl);
7255 }
7256 
7257 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7258   if (Op.getOperand(1).getValueType().isVector())
7259     return LowerVectorStore(Op, DAG);
7260 
7261   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7262          "Custom lowering only for i1 stores");
7263 
7264   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7265 
7266   SDLoc dl(Op);
7267   StoreSDNode *ST = cast<StoreSDNode>(Op);
7268 
7269   SDValue Chain = ST->getChain();
7270   SDValue BasePtr = ST->getBasePtr();
7271   SDValue Value = ST->getValue();
7272   MachineMemOperand *MMO = ST->getMemOperand();
7273 
7274   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7275                       Value);
7276   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7277 }
7278 
7279 // FIXME: Remove this once the ANDI glue bug is fixed:
7280 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7281   assert(Op.getValueType() == MVT::i1 &&
7282          "Custom lowering only for i1 results");
7283 
7284   SDLoc DL(Op);
7285   return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7286 }
7287 
7288 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7289                                                SelectionDAG &DAG) const {
7290 
7291   // Implements a vector truncate that fits in a vector register as a shuffle.
7292   // We want to legalize vector truncates down to where the source fits in
7293   // a vector register (and target is therefore smaller than vector register
7294   // size).  At that point legalization will try to custom lower the sub-legal
7295   // result and get here - where we can contain the truncate as a single target
7296   // operation.
7297 
7298   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7299   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7300   //
7301   // We will implement it for big-endian ordering as this (where x denotes
7302   // undefined):
7303   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7304   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7305   //
7306   // The same operation in little-endian ordering will be:
7307   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7308   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7309 
7310   EVT TrgVT = Op.getValueType();
7311   assert(TrgVT.isVector() && "Vector type expected.");
7312   unsigned TrgNumElts = TrgVT.getVectorNumElements();
7313   EVT EltVT = TrgVT.getVectorElementType();
7314   if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7315       TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7316       !isPowerOf2_32(EltVT.getSizeInBits()))
7317     return SDValue();
7318 
7319   SDValue N1 = Op.getOperand(0);
7320   EVT SrcVT = N1.getValueType();
7321   unsigned SrcSize = SrcVT.getSizeInBits();
7322   if (SrcSize > 256 ||
7323       !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7324       !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
7325     return SDValue();
7326   if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7327     return SDValue();
7328 
7329   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7330   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7331 
7332   SDLoc DL(Op);
7333   SDValue Op1, Op2;
7334   if (SrcSize == 256) {
7335     EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7336     EVT SplitVT =
7337         N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
7338     unsigned SplitNumElts = SplitVT.getVectorNumElements();
7339     Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7340                       DAG.getConstant(0, DL, VecIdxTy));
7341     Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7342                       DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7343   }
7344   else {
7345     Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7346     Op2 = DAG.getUNDEF(WideVT);
7347   }
7348 
7349   // First list the elements we want to keep.
7350   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7351   SmallVector<int, 16> ShuffV;
7352   if (Subtarget.isLittleEndian())
7353     for (unsigned i = 0; i < TrgNumElts; ++i)
7354       ShuffV.push_back(i * SizeMult);
7355   else
7356     for (unsigned i = 1; i <= TrgNumElts; ++i)
7357       ShuffV.push_back(i * SizeMult - 1);
7358 
7359   // Populate the remaining elements with undefs.
7360   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7361     // ShuffV.push_back(i + WideNumElts);
7362     ShuffV.push_back(WideNumElts + 1);
7363 
7364   Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7365   Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7366   return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7367 }
7368 
7369 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7370 /// possible.
7371 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7372   // Not FP, or using SPE? Not a fsel.
7373   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7374       !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
7375     return Op;
7376 
7377   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7378 
7379   EVT ResVT = Op.getValueType();
7380   EVT CmpVT = Op.getOperand(0).getValueType();
7381   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7382   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
7383   SDLoc dl(Op);
7384   SDNodeFlags Flags = Op.getNode()->getFlags();
7385 
7386   // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7387   // presence of infinities.
7388   if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7389     switch (CC) {
7390     default:
7391       break;
7392     case ISD::SETOGT:
7393     case ISD::SETGT:
7394       return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
7395     case ISD::SETOLT:
7396     case ISD::SETLT:
7397       return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
7398     }
7399   }
7400 
7401   // We might be able to do better than this under some circumstances, but in
7402   // general, fsel-based lowering of select is a finite-math-only optimization.
7403   // For more information, see section F.3 of the 2.06 ISA specification.
7404   // With ISA 3.0
7405   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7406       (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7407     return Op;
7408 
7409   // If the RHS of the comparison is a 0.0, we don't need to do the
7410   // subtraction at all.
7411   SDValue Sel1;
7412   if (isFloatingPointZero(RHS))
7413     switch (CC) {
7414     default: break;       // SETUO etc aren't handled by fsel.
7415     case ISD::SETNE:
7416       std::swap(TV, FV);
7417       LLVM_FALLTHROUGH;
7418     case ISD::SETEQ:
7419       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7420         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7421       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7422       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7423         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7424       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7425                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7426     case ISD::SETULT:
7427     case ISD::SETLT:
7428       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7429       LLVM_FALLTHROUGH;
7430     case ISD::SETOGE:
7431     case ISD::SETGE:
7432       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7433         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7434       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7435     case ISD::SETUGT:
7436     case ISD::SETGT:
7437       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7438       LLVM_FALLTHROUGH;
7439     case ISD::SETOLE:
7440     case ISD::SETLE:
7441       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7442         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7443       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7444                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7445     }
7446 
7447   SDValue Cmp;
7448   switch (CC) {
7449   default: break;       // SETUO etc aren't handled by fsel.
7450   case ISD::SETNE:
7451     std::swap(TV, FV);
7452     LLVM_FALLTHROUGH;
7453   case ISD::SETEQ:
7454     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7455     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7456       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7457     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7458     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7459       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7460     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7461                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7462   case ISD::SETULT:
7463   case ISD::SETLT:
7464     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7465     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7466       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7467     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7468   case ISD::SETOGE:
7469   case ISD::SETGE:
7470     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7471     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7472       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7473     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7474   case ISD::SETUGT:
7475   case ISD::SETGT:
7476     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7477     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7478       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7479     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7480   case ISD::SETOLE:
7481   case ISD::SETLE:
7482     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7483     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7484       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7485     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7486   }
7487   return Op;
7488 }
7489 
7490 static unsigned getPPCStrictOpcode(unsigned Opc) {
7491   switch (Opc) {
7492   default:
7493     llvm_unreachable("No strict version of this opcode!");
7494   case PPCISD::FCTIDZ:
7495     return PPCISD::STRICT_FCTIDZ;
7496   case PPCISD::FCTIWZ:
7497     return PPCISD::STRICT_FCTIWZ;
7498   case PPCISD::FCTIDUZ:
7499     return PPCISD::STRICT_FCTIDUZ;
7500   case PPCISD::FCTIWUZ:
7501     return PPCISD::STRICT_FCTIWUZ;
7502   case PPCISD::FCFID:
7503     return PPCISD::STRICT_FCFID;
7504   case PPCISD::FCFIDU:
7505     return PPCISD::STRICT_FCFIDU;
7506   case PPCISD::FCFIDS:
7507     return PPCISD::STRICT_FCFIDS;
7508   case PPCISD::FCFIDUS:
7509     return PPCISD::STRICT_FCFIDUS;
7510   }
7511 }
7512 
7513 static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
7514                               const PPCSubtarget &Subtarget) {
7515   SDLoc dl(Op);
7516   bool IsStrict = Op->isStrictFPOpcode();
7517   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7518                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7519 
7520   // TODO: Any other flags to propagate?
7521   SDNodeFlags Flags;
7522   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7523 
7524   // For strict nodes, source is the second operand.
7525   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7526   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7527   assert(Src.getValueType().isFloatingPoint());
7528   if (Src.getValueType() == MVT::f32) {
7529     if (IsStrict) {
7530       Src =
7531           DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
7532                       DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
7533       Chain = Src.getValue(1);
7534     } else
7535       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7536   }
7537   SDValue Conv;
7538   unsigned Opc = ISD::DELETED_NODE;
7539   switch (Op.getSimpleValueType().SimpleTy) {
7540   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7541   case MVT::i32:
7542     Opc = IsSigned ? PPCISD::FCTIWZ
7543                    : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
7544     break;
7545   case MVT::i64:
7546     assert((IsSigned || Subtarget.hasFPCVT()) &&
7547            "i64 FP_TO_UINT is supported only with FPCVT");
7548     Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
7549   }
7550   if (IsStrict) {
7551     Opc = getPPCStrictOpcode(Opc);
7552     Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
7553                        {Chain, Src}, Flags);
7554   } else {
7555     Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
7556   }
7557   return Conv;
7558 }
7559 
7560 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
7561                                                SelectionDAG &DAG,
7562                                                const SDLoc &dl) const {
7563   SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
7564   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7565                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7566   bool IsStrict = Op->isStrictFPOpcode();
7567 
7568   // Convert the FP value to an int value through memory.
7569   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
7570                   (IsSigned || Subtarget.hasFPCVT());
7571   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
7572   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
7573   MachinePointerInfo MPI =
7574       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
7575 
7576   // Emit a store to the stack slot.
7577   SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
7578   Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
7579   if (i32Stack) {
7580     MachineFunction &MF = DAG.getMachineFunction();
7581     Alignment = Align(4);
7582     MachineMemOperand *MMO =
7583         MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
7584     SDValue Ops[] = { Chain, Tmp, FIPtr };
7585     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7586               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
7587   } else
7588     Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
7589 
7590   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
7591   // add in a bias on big endian.
7592   if (Op.getValueType() == MVT::i32 && !i32Stack) {
7593     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
7594                         DAG.getConstant(4, dl, FIPtr.getValueType()));
7595     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
7596   }
7597 
7598   RLI.Chain = Chain;
7599   RLI.Ptr = FIPtr;
7600   RLI.MPI = MPI;
7601   RLI.Alignment = Alignment;
7602 }
7603 
7604 /// Custom lowers floating point to integer conversions to use
7605 /// the direct move instructions available in ISA 2.07 to avoid the
7606 /// need for load/store combinations.
7607 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
7608                                                     SelectionDAG &DAG,
7609                                                     const SDLoc &dl) const {
7610   SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
7611   SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
7612   if (Op->isStrictFPOpcode())
7613     return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
7614   else
7615     return Mov;
7616 }
7617 
7618 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
7619                                           const SDLoc &dl) const {
7620   bool IsStrict = Op->isStrictFPOpcode();
7621   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7622                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7623   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7624   EVT SrcVT = Src.getValueType();
7625   EVT DstVT = Op.getValueType();
7626 
7627   // FP to INT conversions are legal for f128.
7628   if (SrcVT == MVT::f128)
7629     return Subtarget.hasP9Vector() ? Op : SDValue();
7630 
7631   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7632   // PPC (the libcall is not available).
7633   if (SrcVT == MVT::ppcf128) {
7634     if (DstVT == MVT::i32) {
7635       // TODO: Conservatively pass only nofpexcept flag here. Need to check and
7636       // set other fast-math flags to FP operations in both strict and
7637       // non-strict cases. (FP_TO_SINT, FSUB)
7638       SDNodeFlags Flags;
7639       Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7640 
7641       if (IsSigned) {
7642         SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
7643                                  DAG.getIntPtrConstant(0, dl));
7644         SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
7645                                  DAG.getIntPtrConstant(1, dl));
7646 
7647         // Add the two halves of the long double in round-to-zero mode, and use
7648         // a smaller FP_TO_SINT.
7649         if (IsStrict) {
7650           SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
7651                                     DAG.getVTList(MVT::f64, MVT::Other),
7652                                     {Op.getOperand(0), Lo, Hi}, Flags);
7653           return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
7654                              DAG.getVTList(MVT::i32, MVT::Other),
7655                              {Res.getValue(1), Res}, Flags);
7656         } else {
7657           SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7658           return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
7659         }
7660       } else {
7661         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
7662         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
7663         SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
7664         SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
7665         if (IsStrict) {
7666           // Sel = Src < 0x80000000
7667           // FltOfs = select Sel, 0.0, 0x80000000
7668           // IntOfs = select Sel, 0, 0x80000000
7669           // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
7670           SDValue Chain = Op.getOperand(0);
7671           EVT SetCCVT =
7672               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7673           EVT DstSetCCVT =
7674               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
7675           SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
7676                                      Chain, true);
7677           Chain = Sel.getValue(1);
7678 
7679           SDValue FltOfs = DAG.getSelect(
7680               dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
7681           Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
7682 
7683           SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
7684                                     DAG.getVTList(SrcVT, MVT::Other),
7685                                     {Chain, Src, FltOfs}, Flags);
7686           Chain = Val.getValue(1);
7687           SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
7688                                      DAG.getVTList(DstVT, MVT::Other),
7689                                      {Chain, Val}, Flags);
7690           Chain = SInt.getValue(1);
7691           SDValue IntOfs = DAG.getSelect(
7692               dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
7693           SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
7694           return DAG.getMergeValues({Result, Chain}, dl);
7695         } else {
7696           // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7697           // FIXME: generated code sucks.
7698           SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
7699           True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
7700           True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
7701           SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
7702           return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
7703         }
7704       }
7705     }
7706 
7707     return SDValue();
7708   }
7709 
7710   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
7711     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
7712 
7713   ReuseLoadInfo RLI;
7714   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7715 
7716   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7717                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7718 }
7719 
7720 // We're trying to insert a regular store, S, and then a load, L. If the
7721 // incoming value, O, is a load, we might just be able to have our load use the
7722 // address used by O. However, we don't know if anything else will store to
7723 // that address before we can load from it. To prevent this situation, we need
7724 // to insert our load, L, into the chain as a peer of O. To do this, we give L
7725 // the same chain operand as O, we create a token factor from the chain results
7726 // of O and L, and we replace all uses of O's chain result with that token
7727 // factor (see spliceIntoChain below for this last part).
7728 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
7729                                             ReuseLoadInfo &RLI,
7730                                             SelectionDAG &DAG,
7731                                             ISD::LoadExtType ET) const {
7732   // Conservatively skip reusing for constrained FP nodes.
7733   if (Op->isStrictFPOpcode())
7734     return false;
7735 
7736   SDLoc dl(Op);
7737   bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
7738                        (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
7739   if (ET == ISD::NON_EXTLOAD &&
7740       (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
7741       isOperationLegalOrCustom(Op.getOpcode(),
7742                                Op.getOperand(0).getValueType())) {
7743 
7744     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7745     return true;
7746   }
7747 
7748   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
7749   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
7750       LD->isNonTemporal())
7751     return false;
7752   if (LD->getMemoryVT() != MemVT)
7753     return false;
7754 
7755   // If the result of the load is an illegal type, then we can't build a
7756   // valid chain for reuse since the legalised loads and token factor node that
7757   // ties the legalised loads together uses a different output chain then the
7758   // illegal load.
7759   if (!isTypeLegal(LD->getValueType(0)))
7760     return false;
7761 
7762   RLI.Ptr = LD->getBasePtr();
7763   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
7764     assert(LD->getAddressingMode() == ISD::PRE_INC &&
7765            "Non-pre-inc AM on PPC?");
7766     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
7767                           LD->getOffset());
7768   }
7769 
7770   RLI.Chain = LD->getChain();
7771   RLI.MPI = LD->getPointerInfo();
7772   RLI.IsDereferenceable = LD->isDereferenceable();
7773   RLI.IsInvariant = LD->isInvariant();
7774   RLI.Alignment = LD->getAlign();
7775   RLI.AAInfo = LD->getAAInfo();
7776   RLI.Ranges = LD->getRanges();
7777 
7778   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
7779   return true;
7780 }
7781 
7782 // Given the head of the old chain, ResChain, insert a token factor containing
7783 // it and NewResChain, and make users of ResChain now be users of that token
7784 // factor.
7785 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7786 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
7787                                         SDValue NewResChain,
7788                                         SelectionDAG &DAG) const {
7789   if (!ResChain)
7790     return;
7791 
7792   SDLoc dl(NewResChain);
7793 
7794   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
7795                            NewResChain, DAG.getUNDEF(MVT::Other));
7796   assert(TF.getNode() != NewResChain.getNode() &&
7797          "A new TF really is required here");
7798 
7799   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
7800   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
7801 }
7802 
7803 /// Analyze profitability of direct move
7804 /// prefer float load to int load plus direct move
7805 /// when there is no integer use of int load
7806 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
7807   SDNode *Origin = Op.getOperand(0).getNode();
7808   if (Origin->getOpcode() != ISD::LOAD)
7809     return true;
7810 
7811   // If there is no LXSIBZX/LXSIHZX, like Power8,
7812   // prefer direct move if the memory size is 1 or 2 bytes.
7813   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
7814   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
7815     return true;
7816 
7817   for (SDNode::use_iterator UI = Origin->use_begin(),
7818                             UE = Origin->use_end();
7819        UI != UE; ++UI) {
7820 
7821     // Only look at the users of the loaded value.
7822     if (UI.getUse().get().getResNo() != 0)
7823       continue;
7824 
7825     if (UI->getOpcode() != ISD::SINT_TO_FP &&
7826         UI->getOpcode() != ISD::UINT_TO_FP &&
7827         UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
7828         UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
7829       return true;
7830   }
7831 
7832   return false;
7833 }
7834 
7835 static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
7836                               const PPCSubtarget &Subtarget,
7837                               SDValue Chain = SDValue()) {
7838   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
7839                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7840   SDLoc dl(Op);
7841 
7842   // TODO: Any other flags to propagate?
7843   SDNodeFlags Flags;
7844   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7845 
7846   // If we have FCFIDS, then use it when converting to single-precision.
7847   // Otherwise, convert to double-precision and then round.
7848   bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
7849   unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
7850                               : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
7851   EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
7852   if (Op->isStrictFPOpcode()) {
7853     if (!Chain)
7854       Chain = Op.getOperand(0);
7855     return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
7856                        DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
7857   } else
7858     return DAG.getNode(ConvOpc, dl, ConvTy, Src);
7859 }
7860 
7861 /// Custom lowers integer to floating point conversions to use
7862 /// the direct move instructions available in ISA 2.07 to avoid the
7863 /// need for load/store combinations.
7864 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
7865                                                     SelectionDAG &DAG,
7866                                                     const SDLoc &dl) const {
7867   assert((Op.getValueType() == MVT::f32 ||
7868           Op.getValueType() == MVT::f64) &&
7869          "Invalid floating point type as target of conversion");
7870   assert(Subtarget.hasFPCVT() &&
7871          "Int to FP conversions with direct moves require FPCVT");
7872   SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
7873   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
7874   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
7875                 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7876   unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
7877   SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
7878   return convertIntToFP(Op, Mov, DAG, Subtarget);
7879 }
7880 
7881 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
7882 
7883   EVT VecVT = Vec.getValueType();
7884   assert(VecVT.isVector() && "Expected a vector type.");
7885   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
7886 
7887   EVT EltVT = VecVT.getVectorElementType();
7888   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7889   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7890 
7891   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
7892   SmallVector<SDValue, 16> Ops(NumConcat);
7893   Ops[0] = Vec;
7894   SDValue UndefVec = DAG.getUNDEF(VecVT);
7895   for (unsigned i = 1; i < NumConcat; ++i)
7896     Ops[i] = UndefVec;
7897 
7898   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
7899 }
7900 
7901 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
7902                                                 const SDLoc &dl) const {
7903   bool IsStrict = Op->isStrictFPOpcode();
7904   unsigned Opc = Op.getOpcode();
7905   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7906   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
7907           Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
7908          "Unexpected conversion type");
7909   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
7910          "Supports conversions to v2f64/v4f32 only.");
7911 
7912   // TODO: Any other flags to propagate?
7913   SDNodeFlags Flags;
7914   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7915 
7916   bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
7917   bool FourEltRes = Op.getValueType() == MVT::v4f32;
7918 
7919   SDValue Wide = widenVec(DAG, Src, dl);
7920   EVT WideVT = Wide.getValueType();
7921   unsigned WideNumElts = WideVT.getVectorNumElements();
7922   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
7923 
7924   SmallVector<int, 16> ShuffV;
7925   for (unsigned i = 0; i < WideNumElts; ++i)
7926     ShuffV.push_back(i + WideNumElts);
7927 
7928   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
7929   int SaveElts = FourEltRes ? 4 : 2;
7930   if (Subtarget.isLittleEndian())
7931     for (int i = 0; i < SaveElts; i++)
7932       ShuffV[i * Stride] = i;
7933   else
7934     for (int i = 1; i <= SaveElts; i++)
7935       ShuffV[i * Stride - 1] = i - 1;
7936 
7937   SDValue ShuffleSrc2 =
7938       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
7939   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
7940 
7941   SDValue Extend;
7942   if (SignedConv) {
7943     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
7944     EVT ExtVT = Src.getValueType();
7945     if (Subtarget.hasP9Altivec())
7946       ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
7947                                IntermediateVT.getVectorNumElements());
7948 
7949     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
7950                          DAG.getValueType(ExtVT));
7951   } else
7952     Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
7953 
7954   if (IsStrict)
7955     return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
7956                        {Op.getOperand(0), Extend}, Flags);
7957 
7958   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
7959 }
7960 
7961 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
7962                                           SelectionDAG &DAG) const {
7963   SDLoc dl(Op);
7964   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
7965                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7966   bool IsStrict = Op->isStrictFPOpcode();
7967   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7968   SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
7969 
7970   // TODO: Any other flags to propagate?
7971   SDNodeFlags Flags;
7972   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7973 
7974   EVT InVT = Src.getValueType();
7975   EVT OutVT = Op.getValueType();
7976   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
7977       isOperationCustom(Op.getOpcode(), InVT))
7978     return LowerINT_TO_FPVector(Op, DAG, dl);
7979 
7980   // Conversions to f128 are legal.
7981   if (Op.getValueType() == MVT::f128)
7982     return Subtarget.hasP9Vector() ? Op : SDValue();
7983 
7984   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7985   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
7986     return SDValue();
7987 
7988   if (Src.getValueType() == MVT::i1) {
7989     SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
7990                               DAG.getConstantFP(1.0, dl, Op.getValueType()),
7991                               DAG.getConstantFP(0.0, dl, Op.getValueType()));
7992     if (IsStrict)
7993       return DAG.getMergeValues({Sel, Chain}, dl);
7994     else
7995       return Sel;
7996   }
7997 
7998   // If we have direct moves, we can do all the conversion, skip the store/load
7999   // however, without FPCVT we can't do most conversions.
8000   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8001       Subtarget.isPPC64() && Subtarget.hasFPCVT())
8002     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8003 
8004   assert((IsSigned || Subtarget.hasFPCVT()) &&
8005          "UINT_TO_FP is supported only with FPCVT");
8006 
8007   if (Src.getValueType() == MVT::i64) {
8008     SDValue SINT = Src;
8009     // When converting to single-precision, we actually need to convert
8010     // to double-precision first and then round to single-precision.
8011     // To avoid double-rounding effects during that operation, we have
8012     // to prepare the input operand.  Bits that might be truncated when
8013     // converting to double-precision are replaced by a bit that won't
8014     // be lost at this stage, but is below the single-precision rounding
8015     // position.
8016     //
8017     // However, if -enable-unsafe-fp-math is in effect, accept double
8018     // rounding to avoid the extra overhead.
8019     if (Op.getValueType() == MVT::f32 &&
8020         !Subtarget.hasFPCVT() &&
8021         !DAG.getTarget().Options.UnsafeFPMath) {
8022 
8023       // Twiddle input to make sure the low 11 bits are zero.  (If this
8024       // is the case, we are guaranteed the value will fit into the 53 bit
8025       // mantissa of an IEEE double-precision value without rounding.)
8026       // If any of those low 11 bits were not zero originally, make sure
8027       // bit 12 (value 2048) is set instead, so that the final rounding
8028       // to single-precision gets the correct result.
8029       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8030                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
8031       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8032                           Round, DAG.getConstant(2047, dl, MVT::i64));
8033       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8034       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8035                           Round, DAG.getConstant(-2048, dl, MVT::i64));
8036 
8037       // However, we cannot use that value unconditionally: if the magnitude
8038       // of the input value is small, the bit-twiddling we did above might
8039       // end up visibly changing the output.  Fortunately, in that case, we
8040       // don't need to twiddle bits since the original input will convert
8041       // exactly to double-precision floating-point already.  Therefore,
8042       // construct a conditional to use the original value if the top 11
8043       // bits are all sign-bit copies, and use the rounded value computed
8044       // above otherwise.
8045       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8046                                  SINT, DAG.getConstant(53, dl, MVT::i32));
8047       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8048                          Cond, DAG.getConstant(1, dl, MVT::i64));
8049       Cond = DAG.getSetCC(
8050           dl,
8051           getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8052           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8053 
8054       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8055     }
8056 
8057     ReuseLoadInfo RLI;
8058     SDValue Bits;
8059 
8060     MachineFunction &MF = DAG.getMachineFunction();
8061     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8062       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8063                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8064       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8065     } else if (Subtarget.hasLFIWAX() &&
8066                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8067       MachineMemOperand *MMO =
8068         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8069                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8070       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8071       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8072                                      DAG.getVTList(MVT::f64, MVT::Other),
8073                                      Ops, MVT::i32, MMO);
8074       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8075     } else if (Subtarget.hasFPCVT() &&
8076                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8077       MachineMemOperand *MMO =
8078         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8079                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8080       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8081       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8082                                      DAG.getVTList(MVT::f64, MVT::Other),
8083                                      Ops, MVT::i32, MMO);
8084       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8085     } else if (((Subtarget.hasLFIWAX() &&
8086                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8087                 (Subtarget.hasFPCVT() &&
8088                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8089                SINT.getOperand(0).getValueType() == MVT::i32) {
8090       MachineFrameInfo &MFI = MF.getFrameInfo();
8091       EVT PtrVT = getPointerTy(DAG.getDataLayout());
8092 
8093       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8094       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8095 
8096       SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8097                                    MachinePointerInfo::getFixedStack(
8098                                        DAG.getMachineFunction(), FrameIdx));
8099       Chain = Store;
8100 
8101       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8102              "Expected an i32 store");
8103 
8104       RLI.Ptr = FIdx;
8105       RLI.Chain = Chain;
8106       RLI.MPI =
8107           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8108       RLI.Alignment = Align(4);
8109 
8110       MachineMemOperand *MMO =
8111         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8112                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8113       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8114       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8115                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
8116                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
8117                                      Ops, MVT::i32, MMO);
8118       Chain = Bits.getValue(1);
8119     } else
8120       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8121 
8122     SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8123     if (IsStrict)
8124       Chain = FP.getValue(1);
8125 
8126     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8127       if (IsStrict)
8128         FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8129                          DAG.getVTList(MVT::f32, MVT::Other),
8130                          {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8131       else
8132         FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8133                          DAG.getIntPtrConstant(0, dl));
8134     }
8135     return FP;
8136   }
8137 
8138   assert(Src.getValueType() == MVT::i32 &&
8139          "Unhandled INT_TO_FP type in custom expander!");
8140   // Since we only generate this in 64-bit mode, we can take advantage of
8141   // 64-bit registers.  In particular, sign extend the input value into the
8142   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8143   // then lfd it and fcfid it.
8144   MachineFunction &MF = DAG.getMachineFunction();
8145   MachineFrameInfo &MFI = MF.getFrameInfo();
8146   EVT PtrVT = getPointerTy(MF.getDataLayout());
8147 
8148   SDValue Ld;
8149   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8150     ReuseLoadInfo RLI;
8151     bool ReusingLoad;
8152     if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8153       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8154       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8155 
8156       SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8157                                    MachinePointerInfo::getFixedStack(
8158                                        DAG.getMachineFunction(), FrameIdx));
8159       Chain = Store;
8160 
8161       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8162              "Expected an i32 store");
8163 
8164       RLI.Ptr = FIdx;
8165       RLI.Chain = Chain;
8166       RLI.MPI =
8167           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8168       RLI.Alignment = Align(4);
8169     }
8170 
8171     MachineMemOperand *MMO =
8172       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8173                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8174     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8175     Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8176                                  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8177                                  MVT::i32, MMO);
8178     Chain = Ld.getValue(1);
8179     if (ReusingLoad)
8180       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8181   } else {
8182     assert(Subtarget.isPPC64() &&
8183            "i32->FP without LFIWAX supported only on PPC64");
8184 
8185     int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8186     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8187 
8188     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8189 
8190     // STD the extended value into the stack slot.
8191     SDValue Store = DAG.getStore(
8192         Chain, dl, Ext64, FIdx,
8193         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8194     Chain = Store;
8195 
8196     // Load the value as a double.
8197     Ld = DAG.getLoad(
8198         MVT::f64, dl, Chain, FIdx,
8199         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8200     Chain = Ld.getValue(1);
8201   }
8202 
8203   // FCFID it and return it.
8204   SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8205   if (IsStrict)
8206     Chain = FP.getValue(1);
8207   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8208     if (IsStrict)
8209       FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8210                        DAG.getVTList(MVT::f32, MVT::Other),
8211                        {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8212     else
8213       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8214                        DAG.getIntPtrConstant(0, dl));
8215   }
8216   return FP;
8217 }
8218 
8219 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8220                                             SelectionDAG &DAG) const {
8221   SDLoc dl(Op);
8222   /*
8223    The rounding mode is in bits 30:31 of FPSR, and has the following
8224    settings:
8225      00 Round to nearest
8226      01 Round to 0
8227      10 Round to +inf
8228      11 Round to -inf
8229 
8230   FLT_ROUNDS, on the other hand, expects the following:
8231     -1 Undefined
8232      0 Round to 0
8233      1 Round to nearest
8234      2 Round to +inf
8235      3 Round to -inf
8236 
8237   To perform the conversion, we do:
8238     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8239   */
8240 
8241   MachineFunction &MF = DAG.getMachineFunction();
8242   EVT VT = Op.getValueType();
8243   EVT PtrVT = getPointerTy(MF.getDataLayout());
8244 
8245   // Save FP Control Word to register
8246   SDValue Chain = Op.getOperand(0);
8247   SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8248   Chain = MFFS.getValue(1);
8249 
8250   SDValue CWD;
8251   if (isTypeLegal(MVT::i64)) {
8252     CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8253                       DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8254   } else {
8255     // Save FP register to stack slot
8256     int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8257     SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8258     Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8259 
8260     // Load FP Control Word from low 32 bits of stack slot.
8261     assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
8262            "Stack slot adjustment is valid only on big endian subtargets!");
8263     SDValue Four = DAG.getConstant(4, dl, PtrVT);
8264     SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8265     CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8266     Chain = CWD.getValue(1);
8267   }
8268 
8269   // Transform as necessary
8270   SDValue CWD1 =
8271     DAG.getNode(ISD::AND, dl, MVT::i32,
8272                 CWD, DAG.getConstant(3, dl, MVT::i32));
8273   SDValue CWD2 =
8274     DAG.getNode(ISD::SRL, dl, MVT::i32,
8275                 DAG.getNode(ISD::AND, dl, MVT::i32,
8276                             DAG.getNode(ISD::XOR, dl, MVT::i32,
8277                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
8278                             DAG.getConstant(3, dl, MVT::i32)),
8279                 DAG.getConstant(1, dl, MVT::i32));
8280 
8281   SDValue RetVal =
8282     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8283 
8284   RetVal =
8285       DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8286                   dl, VT, RetVal);
8287 
8288   return DAG.getMergeValues({RetVal, Chain}, dl);
8289 }
8290 
8291 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8292   EVT VT = Op.getValueType();
8293   unsigned BitWidth = VT.getSizeInBits();
8294   SDLoc dl(Op);
8295   assert(Op.getNumOperands() == 3 &&
8296          VT == Op.getOperand(1).getValueType() &&
8297          "Unexpected SHL!");
8298 
8299   // Expand into a bunch of logical ops.  Note that these ops
8300   // depend on the PPC behavior for oversized shift amounts.
8301   SDValue Lo = Op.getOperand(0);
8302   SDValue Hi = Op.getOperand(1);
8303   SDValue Amt = Op.getOperand(2);
8304   EVT AmtVT = Amt.getValueType();
8305 
8306   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8307                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8308   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8309   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8310   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8311   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8312                              DAG.getConstant(-BitWidth, dl, AmtVT));
8313   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8314   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8315   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8316   SDValue OutOps[] = { OutLo, OutHi };
8317   return DAG.getMergeValues(OutOps, dl);
8318 }
8319 
8320 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8321   EVT VT = Op.getValueType();
8322   SDLoc dl(Op);
8323   unsigned BitWidth = VT.getSizeInBits();
8324   assert(Op.getNumOperands() == 3 &&
8325          VT == Op.getOperand(1).getValueType() &&
8326          "Unexpected SRL!");
8327 
8328   // Expand into a bunch of logical ops.  Note that these ops
8329   // depend on the PPC behavior for oversized shift amounts.
8330   SDValue Lo = Op.getOperand(0);
8331   SDValue Hi = Op.getOperand(1);
8332   SDValue Amt = Op.getOperand(2);
8333   EVT AmtVT = Amt.getValueType();
8334 
8335   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8336                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8337   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8338   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8339   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8340   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8341                              DAG.getConstant(-BitWidth, dl, AmtVT));
8342   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8343   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8344   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8345   SDValue OutOps[] = { OutLo, OutHi };
8346   return DAG.getMergeValues(OutOps, dl);
8347 }
8348 
8349 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8350   SDLoc dl(Op);
8351   EVT VT = Op.getValueType();
8352   unsigned BitWidth = VT.getSizeInBits();
8353   assert(Op.getNumOperands() == 3 &&
8354          VT == Op.getOperand(1).getValueType() &&
8355          "Unexpected SRA!");
8356 
8357   // Expand into a bunch of logical ops, followed by a select_cc.
8358   SDValue Lo = Op.getOperand(0);
8359   SDValue Hi = Op.getOperand(1);
8360   SDValue Amt = Op.getOperand(2);
8361   EVT AmtVT = Amt.getValueType();
8362 
8363   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8364                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8365   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8366   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8367   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8368   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8369                              DAG.getConstant(-BitWidth, dl, AmtVT));
8370   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8371   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8372   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8373                                   Tmp4, Tmp6, ISD::SETLE);
8374   SDValue OutOps[] = { OutLo, OutHi };
8375   return DAG.getMergeValues(OutOps, dl);
8376 }
8377 
8378 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8379                                             SelectionDAG &DAG) const {
8380   SDLoc dl(Op);
8381   EVT VT = Op.getValueType();
8382   unsigned BitWidth = VT.getSizeInBits();
8383 
8384   bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8385   SDValue X = Op.getOperand(0);
8386   SDValue Y = Op.getOperand(1);
8387   SDValue Z = Op.getOperand(2);
8388   EVT AmtVT = Z.getValueType();
8389 
8390   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8391   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8392   // This is simpler than TargetLowering::expandFunnelShift because we can rely
8393   // on PowerPC shift by BW being well defined.
8394   Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8395                   DAG.getConstant(BitWidth - 1, dl, AmtVT));
8396   SDValue SubZ =
8397       DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8398   X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8399   Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8400   return DAG.getNode(ISD::OR, dl, VT, X, Y);
8401 }
8402 
8403 //===----------------------------------------------------------------------===//
8404 // Vector related lowering.
8405 //
8406 
8407 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8408 /// element size of SplatSize. Cast the result to VT.
8409 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8410                                       SelectionDAG &DAG, const SDLoc &dl) {
8411   static const MVT VTys[] = { // canonical VT to use for each size.
8412     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8413   };
8414 
8415   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8416 
8417   // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8418   if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8419     SplatSize = 1;
8420     Val = 0xFF;
8421   }
8422 
8423   EVT CanonicalVT = VTys[SplatSize-1];
8424 
8425   // Build a canonical splat for this value.
8426   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8427 }
8428 
8429 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8430 /// specified intrinsic ID.
8431 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8432                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
8433   if (DestVT == MVT::Other) DestVT = Op.getValueType();
8434   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8435                      DAG.getConstant(IID, dl, MVT::i32), Op);
8436 }
8437 
8438 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8439 /// specified intrinsic ID.
8440 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8441                                 SelectionDAG &DAG, const SDLoc &dl,
8442                                 EVT DestVT = MVT::Other) {
8443   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8444   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8445                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8446 }
8447 
8448 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8449 /// specified intrinsic ID.
8450 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8451                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8452                                 EVT DestVT = MVT::Other) {
8453   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8454   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8455                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8456 }
8457 
8458 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8459 /// amount.  The result has the specified value type.
8460 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8461                            SelectionDAG &DAG, const SDLoc &dl) {
8462   // Force LHS/RHS to be the right type.
8463   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8464   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8465 
8466   int Ops[16];
8467   for (unsigned i = 0; i != 16; ++i)
8468     Ops[i] = i + Amt;
8469   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8470   return DAG.getNode(ISD::BITCAST, dl, VT, T);
8471 }
8472 
8473 /// Do we have an efficient pattern in a .td file for this node?
8474 ///
8475 /// \param V - pointer to the BuildVectorSDNode being matched
8476 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8477 ///
8478 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8479 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8480 /// the opposite is true (expansion is beneficial) are:
8481 /// - The node builds a vector out of integers that are not 32 or 64-bits
8482 /// - The node builds a vector out of constants
8483 /// - The node is a "load-and-splat"
8484 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8485 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
8486                                             bool HasDirectMove,
8487                                             bool HasP8Vector) {
8488   EVT VecVT = V->getValueType(0);
8489   bool RightType = VecVT == MVT::v2f64 ||
8490     (HasP8Vector && VecVT == MVT::v4f32) ||
8491     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8492   if (!RightType)
8493     return false;
8494 
8495   bool IsSplat = true;
8496   bool IsLoad = false;
8497   SDValue Op0 = V->getOperand(0);
8498 
8499   // This function is called in a block that confirms the node is not a constant
8500   // splat. So a constant BUILD_VECTOR here means the vector is built out of
8501   // different constants.
8502   if (V->isConstant())
8503     return false;
8504   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8505     if (V->getOperand(i).isUndef())
8506       return false;
8507     // We want to expand nodes that represent load-and-splat even if the
8508     // loaded value is a floating point truncation or conversion to int.
8509     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8510         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8511          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8512         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8513          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8514         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8515          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
8516       IsLoad = true;
8517     // If the operands are different or the input is not a load and has more
8518     // uses than just this BV node, then it isn't a splat.
8519     if (V->getOperand(i) != Op0 ||
8520         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8521       IsSplat = false;
8522   }
8523   return !(IsSplat && IsLoad);
8524 }
8525 
8526 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8527 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8528 
8529   SDLoc dl(Op);
8530   SDValue Op0 = Op->getOperand(0);
8531 
8532   if ((Op.getValueType() != MVT::f128) ||
8533       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8534       (Op0.getOperand(0).getValueType() != MVT::i64) ||
8535       (Op0.getOperand(1).getValueType() != MVT::i64))
8536     return SDValue();
8537 
8538   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8539                      Op0.getOperand(1));
8540 }
8541 
8542 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
8543   const SDValue *InputLoad = &Op;
8544   if (InputLoad->getOpcode() == ISD::BITCAST)
8545     InputLoad = &InputLoad->getOperand(0);
8546   if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
8547       InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
8548     IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
8549     InputLoad = &InputLoad->getOperand(0);
8550   }
8551   if (InputLoad->getOpcode() != ISD::LOAD)
8552     return nullptr;
8553   LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
8554   return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
8555 }
8556 
8557 // Convert the argument APFloat to a single precision APFloat if there is no
8558 // loss in information during the conversion to single precision APFloat and the
8559 // resulting number is not a denormal number. Return true if successful.
8560 bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
8561   APFloat APFloatToConvert = ArgAPFloat;
8562   bool LosesInfo = true;
8563   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
8564                            &LosesInfo);
8565   bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
8566   if (Success)
8567     ArgAPFloat = APFloatToConvert;
8568   return Success;
8569 }
8570 
8571 // Bitcast the argument APInt to a double and convert it to a single precision
8572 // APFloat, bitcast the APFloat to an APInt and assign it to the original
8573 // argument if there is no loss in information during the conversion from
8574 // double to single precision APFloat and the resulting number is not a denormal
8575 // number. Return true if successful.
8576 bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
8577   double DpValue = ArgAPInt.bitsToDouble();
8578   APFloat APFloatDp(DpValue);
8579   bool Success = convertToNonDenormSingle(APFloatDp);
8580   if (Success)
8581     ArgAPInt = APFloatDp.bitcastToAPInt();
8582   return Success;
8583 }
8584 
8585 // If this is a case we can't handle, return null and let the default
8586 // expansion code take care of it.  If we CAN select this case, and if it
8587 // selects to a single instruction, return Op.  Otherwise, if we can codegen
8588 // this case more efficiently than a constant pool load, lower it to the
8589 // sequence of ops that should be used.
8590 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
8591                                              SelectionDAG &DAG) const {
8592   SDLoc dl(Op);
8593   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
8594   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8595 
8596   // Check if this is a splat of a constant value.
8597   APInt APSplatBits, APSplatUndef;
8598   unsigned SplatBitSize;
8599   bool HasAnyUndefs;
8600   bool BVNIsConstantSplat =
8601       BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8602                            HasAnyUndefs, 0, !Subtarget.isLittleEndian());
8603 
8604   // If it is a splat of a double, check if we can shrink it to a 32 bit
8605   // non-denormal float which when converted back to double gives us the same
8606   // double. This is to exploit the XXSPLTIDP instruction.
8607   // If we lose precision, we use XXSPLTI32DX.
8608   if (BVNIsConstantSplat && (SplatBitSize == 64) &&
8609       Subtarget.hasPrefixInstrs()) {
8610     // Check the type first to short-circuit so we don't modify APSplatBits if
8611     // this block isn't executed.
8612     if ((Op->getValueType(0) == MVT::v2f64) &&
8613         convertToNonDenormSingle(APSplatBits)) {
8614       SDValue SplatNode = DAG.getNode(
8615           PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
8616           DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
8617       return DAG.getBitcast(Op.getValueType(), SplatNode);
8618     } else {
8619       // We may lose precision, so we have to use XXSPLTI32DX.
8620 
8621       uint32_t Hi =
8622           (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
8623       uint32_t Lo =
8624           (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
8625       SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
8626 
8627       if (!Hi || !Lo)
8628         // If either load is 0, then we should generate XXLXOR to set to 0.
8629         SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
8630 
8631       if (Hi)
8632         SplatNode = DAG.getNode(
8633             PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
8634             DAG.getTargetConstant(0, dl, MVT::i32),
8635             DAG.getTargetConstant(Hi, dl, MVT::i32));
8636 
8637       if (Lo)
8638         SplatNode =
8639             DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
8640                         DAG.getTargetConstant(1, dl, MVT::i32),
8641                         DAG.getTargetConstant(Lo, dl, MVT::i32));
8642 
8643       return DAG.getBitcast(Op.getValueType(), SplatNode);
8644     }
8645   }
8646 
8647   if (!BVNIsConstantSplat || SplatBitSize > 32) {
8648 
8649     bool IsPermutedLoad = false;
8650     const SDValue *InputLoad =
8651         getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
8652     // Handle load-and-splat patterns as we have instructions that will do this
8653     // in one go.
8654     if (InputLoad && DAG.isSplatValue(Op, true)) {
8655       LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
8656 
8657       // We have handling for 4 and 8 byte elements.
8658       unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
8659 
8660       // Checking for a single use of this load, we have to check for vector
8661       // width (128 bits) / ElementSize uses (since each operand of the
8662       // BUILD_VECTOR is a separate use of the value.
8663       unsigned NumUsesOfInputLD = 128 / ElementSize;
8664       for (SDValue BVInOp : Op->ops())
8665         if (BVInOp.isUndef())
8666           NumUsesOfInputLD--;
8667       assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
8668       if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
8669           ((Subtarget.hasVSX() && ElementSize == 64) ||
8670            (Subtarget.hasP9Vector() && ElementSize == 32))) {
8671         SDValue Ops[] = {
8672           LD->getChain(),    // Chain
8673           LD->getBasePtr(),  // Ptr
8674           DAG.getValueType(Op.getValueType()) // VT
8675         };
8676         SDValue LdSplt = DAG.getMemIntrinsicNode(
8677             PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8678             Ops, LD->getMemoryVT(), LD->getMemOperand());
8679         // Replace all uses of the output chain of the original load with the
8680         // output chain of the new load.
8681         DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
8682                                       LdSplt.getValue(1));
8683         return LdSplt;
8684       }
8685     }
8686 
8687     // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
8688     // 32-bits can be lowered to VSX instructions under certain conditions.
8689     // Without VSX, there is no pattern more efficient than expanding the node.
8690     if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
8691         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
8692                                         Subtarget.hasP8Vector()))
8693       return Op;
8694     return SDValue();
8695   }
8696 
8697   uint64_t SplatBits = APSplatBits.getZExtValue();
8698   uint64_t SplatUndef = APSplatUndef.getZExtValue();
8699   unsigned SplatSize = SplatBitSize / 8;
8700 
8701   // First, handle single instruction cases.
8702 
8703   // All zeros?
8704   if (SplatBits == 0) {
8705     // Canonicalize all zero vectors to be v4i32.
8706     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
8707       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
8708       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
8709     }
8710     return Op;
8711   }
8712 
8713   // We have XXSPLTIW for constant splats four bytes wide.
8714   // Given vector length is a multiple of 4, 2-byte splats can be replaced
8715   // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
8716   // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
8717   // turned into a 4-byte splat of 0xABABABAB.
8718   if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
8719     return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
8720                                   Op.getValueType(), DAG, dl);
8721 
8722   if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
8723     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
8724                                   dl);
8725 
8726   // We have XXSPLTIB for constant splats one byte wide.
8727   if (Subtarget.hasP9Vector() && SplatSize == 1)
8728     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
8729                                   dl);
8730 
8731   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8732   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
8733                     (32-SplatBitSize));
8734   if (SextVal >= -16 && SextVal <= 15)
8735     return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
8736                                   dl);
8737 
8738   // Two instruction sequences.
8739 
8740   // If this value is in the range [-32,30] and is even, use:
8741   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8742   // If this value is in the range [17,31] and is odd, use:
8743   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8744   // If this value is in the range [-31,-17] and is odd, use:
8745   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8746   // Note the last two are three-instruction sequences.
8747   if (SextVal >= -32 && SextVal <= 31) {
8748     // To avoid having these optimizations undone by constant folding,
8749     // we convert to a pseudo that will be expanded later into one of
8750     // the above forms.
8751     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
8752     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
8753               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
8754     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
8755     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
8756     if (VT == Op.getValueType())
8757       return RetVal;
8758     else
8759       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
8760   }
8761 
8762   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
8763   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
8764   // for fneg/fabs.
8765   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
8766     // Make -1 and vspltisw -1:
8767     SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
8768 
8769     // Make the VSLW intrinsic, computing 0x8000_0000.
8770     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
8771                                    OnesV, DAG, dl);
8772 
8773     // xor by OnesV to invert it.
8774     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
8775     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8776   }
8777 
8778   // Check to see if this is a wide variety of vsplti*, binop self cases.
8779   static const signed char SplatCsts[] = {
8780     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8781     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8782   };
8783 
8784   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
8785     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8786     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
8787     int i = SplatCsts[idx];
8788 
8789     // Figure out what shift amount will be used by altivec if shifted by i in
8790     // this splat size.
8791     unsigned TypeShiftAmt = i & (SplatBitSize-1);
8792 
8793     // vsplti + shl self.
8794     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
8795       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8796       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8797         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
8798         Intrinsic::ppc_altivec_vslw
8799       };
8800       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8801       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8802     }
8803 
8804     // vsplti + srl self.
8805     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8806       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8807       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8808         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
8809         Intrinsic::ppc_altivec_vsrw
8810       };
8811       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8812       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8813     }
8814 
8815     // vsplti + rol self.
8816     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
8817                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
8818       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8819       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8820         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
8821         Intrinsic::ppc_altivec_vrlw
8822       };
8823       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8824       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8825     }
8826 
8827     // t = vsplti c, result = vsldoi t, t, 1
8828     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
8829       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8830       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
8831       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8832     }
8833     // t = vsplti c, result = vsldoi t, t, 2
8834     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
8835       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8836       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
8837       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8838     }
8839     // t = vsplti c, result = vsldoi t, t, 3
8840     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
8841       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8842       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
8843       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8844     }
8845   }
8846 
8847   return SDValue();
8848 }
8849 
8850 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8851 /// the specified operations to build the shuffle.
8852 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8853                                       SDValue RHS, SelectionDAG &DAG,
8854                                       const SDLoc &dl) {
8855   unsigned OpNum = (PFEntry >> 26) & 0x0F;
8856   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8857   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
8858 
8859   enum {
8860     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8861     OP_VMRGHW,
8862     OP_VMRGLW,
8863     OP_VSPLTISW0,
8864     OP_VSPLTISW1,
8865     OP_VSPLTISW2,
8866     OP_VSPLTISW3,
8867     OP_VSLDOI4,
8868     OP_VSLDOI8,
8869     OP_VSLDOI12
8870   };
8871 
8872   if (OpNum == OP_COPY) {
8873     if (LHSID == (1*9+2)*9+3) return LHS;
8874     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8875     return RHS;
8876   }
8877 
8878   SDValue OpLHS, OpRHS;
8879   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8880   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8881 
8882   int ShufIdxs[16];
8883   switch (OpNum) {
8884   default: llvm_unreachable("Unknown i32 permute!");
8885   case OP_VMRGHW:
8886     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
8887     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
8888     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
8889     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
8890     break;
8891   case OP_VMRGLW:
8892     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
8893     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
8894     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
8895     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
8896     break;
8897   case OP_VSPLTISW0:
8898     for (unsigned i = 0; i != 16; ++i)
8899       ShufIdxs[i] = (i&3)+0;
8900     break;
8901   case OP_VSPLTISW1:
8902     for (unsigned i = 0; i != 16; ++i)
8903       ShufIdxs[i] = (i&3)+4;
8904     break;
8905   case OP_VSPLTISW2:
8906     for (unsigned i = 0; i != 16; ++i)
8907       ShufIdxs[i] = (i&3)+8;
8908     break;
8909   case OP_VSPLTISW3:
8910     for (unsigned i = 0; i != 16; ++i)
8911       ShufIdxs[i] = (i&3)+12;
8912     break;
8913   case OP_VSLDOI4:
8914     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
8915   case OP_VSLDOI8:
8916     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
8917   case OP_VSLDOI12:
8918     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
8919   }
8920   EVT VT = OpLHS.getValueType();
8921   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
8922   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
8923   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
8924   return DAG.getNode(ISD::BITCAST, dl, VT, T);
8925 }
8926 
8927 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8928 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8929 /// SDValue.
8930 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
8931                                            SelectionDAG &DAG) const {
8932   const unsigned BytesInVector = 16;
8933   bool IsLE = Subtarget.isLittleEndian();
8934   SDLoc dl(N);
8935   SDValue V1 = N->getOperand(0);
8936   SDValue V2 = N->getOperand(1);
8937   unsigned ShiftElts = 0, InsertAtByte = 0;
8938   bool Swap = false;
8939 
8940   // Shifts required to get the byte we want at element 7.
8941   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
8942                                    0, 15, 14, 13, 12, 11, 10, 9};
8943   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
8944                                 1, 2,  3,  4,  5,  6,  7,  8};
8945 
8946   ArrayRef<int> Mask = N->getMask();
8947   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8948 
8949   // For each mask element, find out if we're just inserting something
8950   // from V2 into V1 or vice versa.
8951   // Possible permutations inserting an element from V2 into V1:
8952   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8953   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8954   //   ...
8955   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8956   // Inserting from V1 into V2 will be similar, except mask range will be
8957   // [16,31].
8958 
8959   bool FoundCandidate = false;
8960   // If both vector operands for the shuffle are the same vector, the mask
8961   // will contain only elements from the first one and the second one will be
8962   // undef.
8963   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
8964   // Go through the mask of half-words to find an element that's being moved
8965   // from one vector to the other.
8966   for (unsigned i = 0; i < BytesInVector; ++i) {
8967     unsigned CurrentElement = Mask[i];
8968     // If 2nd operand is undefined, we should only look for element 7 in the
8969     // Mask.
8970     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
8971       continue;
8972 
8973     bool OtherElementsInOrder = true;
8974     // Examine the other elements in the Mask to see if they're in original
8975     // order.
8976     for (unsigned j = 0; j < BytesInVector; ++j) {
8977       if (j == i)
8978         continue;
8979       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8980       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
8981       // in which we always assume we're always picking from the 1st operand.
8982       int MaskOffset =
8983           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
8984       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
8985         OtherElementsInOrder = false;
8986         break;
8987       }
8988     }
8989     // If other elements are in original order, we record the number of shifts
8990     // we need to get the element we want into element 7. Also record which byte
8991     // in the vector we should insert into.
8992     if (OtherElementsInOrder) {
8993       // If 2nd operand is undefined, we assume no shifts and no swapping.
8994       if (V2.isUndef()) {
8995         ShiftElts = 0;
8996         Swap = false;
8997       } else {
8998         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
8999         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9000                          : BigEndianShifts[CurrentElement & 0xF];
9001         Swap = CurrentElement < BytesInVector;
9002       }
9003       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9004       FoundCandidate = true;
9005       break;
9006     }
9007   }
9008 
9009   if (!FoundCandidate)
9010     return SDValue();
9011 
9012   // Candidate found, construct the proper SDAG sequence with VINSERTB,
9013   // optionally with VECSHL if shift is required.
9014   if (Swap)
9015     std::swap(V1, V2);
9016   if (V2.isUndef())
9017     V2 = V1;
9018   if (ShiftElts) {
9019     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9020                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9021     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9022                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9023   }
9024   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9025                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
9026 }
9027 
9028 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9029 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9030 /// SDValue.
9031 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9032                                            SelectionDAG &DAG) const {
9033   const unsigned NumHalfWords = 8;
9034   const unsigned BytesInVector = NumHalfWords * 2;
9035   // Check that the shuffle is on half-words.
9036   if (!isNByteElemShuffleMask(N, 2, 1))
9037     return SDValue();
9038 
9039   bool IsLE = Subtarget.isLittleEndian();
9040   SDLoc dl(N);
9041   SDValue V1 = N->getOperand(0);
9042   SDValue V2 = N->getOperand(1);
9043   unsigned ShiftElts = 0, InsertAtByte = 0;
9044   bool Swap = false;
9045 
9046   // Shifts required to get the half-word we want at element 3.
9047   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9048   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9049 
9050   uint32_t Mask = 0;
9051   uint32_t OriginalOrderLow = 0x1234567;
9052   uint32_t OriginalOrderHigh = 0x89ABCDEF;
9053   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
9054   // 32-bit space, only need 4-bit nibbles per element.
9055   for (unsigned i = 0; i < NumHalfWords; ++i) {
9056     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9057     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9058   }
9059 
9060   // For each mask element, find out if we're just inserting something
9061   // from V2 into V1 or vice versa.  Possible permutations inserting an element
9062   // from V2 into V1:
9063   //   X, 1, 2, 3, 4, 5, 6, 7
9064   //   0, X, 2, 3, 4, 5, 6, 7
9065   //   0, 1, X, 3, 4, 5, 6, 7
9066   //   0, 1, 2, X, 4, 5, 6, 7
9067   //   0, 1, 2, 3, X, 5, 6, 7
9068   //   0, 1, 2, 3, 4, X, 6, 7
9069   //   0, 1, 2, 3, 4, 5, X, 7
9070   //   0, 1, 2, 3, 4, 5, 6, X
9071   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9072 
9073   bool FoundCandidate = false;
9074   // Go through the mask of half-words to find an element that's being moved
9075   // from one vector to the other.
9076   for (unsigned i = 0; i < NumHalfWords; ++i) {
9077     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9078     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9079     uint32_t MaskOtherElts = ~(0xF << MaskShift);
9080     uint32_t TargetOrder = 0x0;
9081 
9082     // If both vector operands for the shuffle are the same vector, the mask
9083     // will contain only elements from the first one and the second one will be
9084     // undef.
9085     if (V2.isUndef()) {
9086       ShiftElts = 0;
9087       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9088       TargetOrder = OriginalOrderLow;
9089       Swap = false;
9090       // Skip if not the correct element or mask of other elements don't equal
9091       // to our expected order.
9092       if (MaskOneElt == VINSERTHSrcElem &&
9093           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9094         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9095         FoundCandidate = true;
9096         break;
9097       }
9098     } else { // If both operands are defined.
9099       // Target order is [8,15] if the current mask is between [0,7].
9100       TargetOrder =
9101           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9102       // Skip if mask of other elements don't equal our expected order.
9103       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9104         // We only need the last 3 bits for the number of shifts.
9105         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9106                          : BigEndianShifts[MaskOneElt & 0x7];
9107         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9108         Swap = MaskOneElt < NumHalfWords;
9109         FoundCandidate = true;
9110         break;
9111       }
9112     }
9113   }
9114 
9115   if (!FoundCandidate)
9116     return SDValue();
9117 
9118   // Candidate found, construct the proper SDAG sequence with VINSERTH,
9119   // optionally with VECSHL if shift is required.
9120   if (Swap)
9121     std::swap(V1, V2);
9122   if (V2.isUndef())
9123     V2 = V1;
9124   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9125   if (ShiftElts) {
9126     // Double ShiftElts because we're left shifting on v16i8 type.
9127     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9128                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9129     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9130     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9131                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9132     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9133   }
9134   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9135   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9136                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
9137   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9138 }
9139 
9140 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9141 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9142 /// return the default SDValue.
9143 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9144                                               SelectionDAG &DAG) const {
9145   // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9146   // to v16i8. Peek through the bitcasts to get the actual operands.
9147   SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9148   SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9149 
9150   auto ShuffleMask = SVN->getMask();
9151   SDValue VecShuffle(SVN, 0);
9152   SDLoc DL(SVN);
9153 
9154   // Check that we have a four byte shuffle.
9155   if (!isNByteElemShuffleMask(SVN, 4, 1))
9156     return SDValue();
9157 
9158   // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9159   if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9160     std::swap(LHS, RHS);
9161     VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
9162     ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9163   }
9164 
9165   // Ensure that the RHS is a vector of constants.
9166   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9167   if (!BVN)
9168     return SDValue();
9169 
9170   // Check if RHS is a splat of 4-bytes (or smaller).
9171   APInt APSplatValue, APSplatUndef;
9172   unsigned SplatBitSize;
9173   bool HasAnyUndefs;
9174   if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9175                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9176       SplatBitSize > 32)
9177     return SDValue();
9178 
9179   // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9180   // The instruction splats a constant C into two words of the source vector
9181   // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9182   // Thus we check that the shuffle mask is the equivalent  of
9183   // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9184   // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9185   // within each word are consecutive, so we only need to check the first byte.
9186   SDValue Index;
9187   bool IsLE = Subtarget.isLittleEndian();
9188   if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9189       (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9190        ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9191     Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9192   else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9193            (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9194             ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9195     Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9196   else
9197     return SDValue();
9198 
9199   // If the splat is narrower than 32-bits, we need to get the 32-bit value
9200   // for XXSPLTI32DX.
9201   unsigned SplatVal = APSplatValue.getZExtValue();
9202   for (; SplatBitSize < 32; SplatBitSize <<= 1)
9203     SplatVal |= (SplatVal << SplatBitSize);
9204 
9205   SDValue SplatNode = DAG.getNode(
9206       PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9207       Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9208   return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9209 }
9210 
9211 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9212 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9213 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9214 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9215 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9216   assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9217   assert(Op.getValueType() == MVT::v1i128 &&
9218          "Only set v1i128 as custom, other type shouldn't reach here!");
9219   SDLoc dl(Op);
9220   SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9221   SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9222   unsigned SHLAmt = N1.getConstantOperandVal(0);
9223   if (SHLAmt % 8 == 0) {
9224     SmallVector<int, 16> Mask(16, 0);
9225     std::iota(Mask.begin(), Mask.end(), 0);
9226     std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9227     if (SDValue Shuffle =
9228             DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9229                                  DAG.getUNDEF(MVT::v16i8), Mask))
9230       return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9231   }
9232   SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9233   SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9234                               DAG.getConstant(SHLAmt, dl, MVT::i32));
9235   SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9236                               DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9237   SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9238   return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9239 }
9240 
9241 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
9242 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
9243 /// return the code it can be lowered into.  Worst case, it can always be
9244 /// lowered into a vperm.
9245 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9246                                                SelectionDAG &DAG) const {
9247   SDLoc dl(Op);
9248   SDValue V1 = Op.getOperand(0);
9249   SDValue V2 = Op.getOperand(1);
9250   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9251 
9252   // Any nodes that were combined in the target-independent combiner prior
9253   // to vector legalization will not be sent to the target combine. Try to
9254   // combine it here.
9255   if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9256     if (!isa<ShuffleVectorSDNode>(NewShuffle))
9257       return NewShuffle;
9258     Op = NewShuffle;
9259     SVOp = cast<ShuffleVectorSDNode>(Op);
9260     V1 = Op.getOperand(0);
9261     V2 = Op.getOperand(1);
9262   }
9263   EVT VT = Op.getValueType();
9264   bool isLittleEndian = Subtarget.isLittleEndian();
9265 
9266   unsigned ShiftElts, InsertAtByte;
9267   bool Swap = false;
9268 
9269   // If this is a load-and-splat, we can do that with a single instruction
9270   // in some cases. However if the load has multiple uses, we don't want to
9271   // combine it because that will just produce multiple loads.
9272   bool IsPermutedLoad = false;
9273   const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9274   if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9275       (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9276       InputLoad->hasOneUse()) {
9277     bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9278     int SplatIdx =
9279       PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9280 
9281     // The splat index for permuted loads will be in the left half of the vector
9282     // which is strictly wider than the loaded value by 8 bytes. So we need to
9283     // adjust the splat index to point to the correct address in memory.
9284     if (IsPermutedLoad) {
9285       assert(isLittleEndian && "Unexpected permuted load on big endian target");
9286       SplatIdx += IsFourByte ? 2 : 1;
9287       assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9288              "Splat of a value outside of the loaded memory");
9289     }
9290 
9291     LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9292     // For 4-byte load-and-splat, we need Power9.
9293     if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9294       uint64_t Offset = 0;
9295       if (IsFourByte)
9296         Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9297       else
9298         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9299 
9300       SDValue BasePtr = LD->getBasePtr();
9301       if (Offset != 0)
9302         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9303                               BasePtr, DAG.getIntPtrConstant(Offset, dl));
9304       SDValue Ops[] = {
9305         LD->getChain(),    // Chain
9306         BasePtr,           // BasePtr
9307         DAG.getValueType(Op.getValueType()) // VT
9308       };
9309       SDVTList VTL =
9310         DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9311       SDValue LdSplt =
9312         DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9313                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
9314       DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9315       if (LdSplt.getValueType() != SVOp->getValueType(0))
9316         LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9317       return LdSplt;
9318     }
9319   }
9320   if (Subtarget.hasP9Vector() &&
9321       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9322                            isLittleEndian)) {
9323     if (Swap)
9324       std::swap(V1, V2);
9325     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9326     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9327     if (ShiftElts) {
9328       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9329                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
9330       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9331                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9332       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9333     }
9334     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9335                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9336     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9337   }
9338 
9339   if (Subtarget.hasPrefixInstrs()) {
9340     SDValue SplatInsertNode;
9341     if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9342       return SplatInsertNode;
9343   }
9344 
9345   if (Subtarget.hasP9Altivec()) {
9346     SDValue NewISDNode;
9347     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9348       return NewISDNode;
9349 
9350     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9351       return NewISDNode;
9352   }
9353 
9354   if (Subtarget.hasVSX() &&
9355       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9356     if (Swap)
9357       std::swap(V1, V2);
9358     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9359     SDValue Conv2 =
9360         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9361 
9362     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9363                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9364     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9365   }
9366 
9367   if (Subtarget.hasVSX() &&
9368     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9369     if (Swap)
9370       std::swap(V1, V2);
9371     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9372     SDValue Conv2 =
9373         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9374 
9375     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9376                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9377     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9378   }
9379 
9380   if (Subtarget.hasP9Vector()) {
9381      if (PPC::isXXBRHShuffleMask(SVOp)) {
9382       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9383       SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9384       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9385     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9386       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9387       SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9388       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9389     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9390       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9391       SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9392       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9393     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9394       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9395       SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9396       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9397     }
9398   }
9399 
9400   if (Subtarget.hasVSX()) {
9401     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9402       int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9403 
9404       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9405       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9406                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
9407       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9408     }
9409 
9410     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9411     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9412       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9413       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
9414       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9415     }
9416   }
9417 
9418   // Cases that are handled by instructions that take permute immediates
9419   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9420   // selected by the instruction selector.
9421   if (V2.isUndef()) {
9422     if (PPC::isSplatShuffleMask(SVOp, 1) ||
9423         PPC::isSplatShuffleMask(SVOp, 2) ||
9424         PPC::isSplatShuffleMask(SVOp, 4) ||
9425         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
9426         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
9427         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9428         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9429         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9430         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9431         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9432         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9433         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9434         (Subtarget.hasP8Altivec() && (
9435          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
9436          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9437          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9438       return Op;
9439     }
9440   }
9441 
9442   // Altivec has a variety of "shuffle immediates" that take two vector inputs
9443   // and produce a fixed permutation.  If any of these match, do not lower to
9444   // VPERM.
9445   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
9446   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9447       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9448       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
9449       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9450       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9451       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9452       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9453       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9454       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9455       (Subtarget.hasP8Altivec() && (
9456        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9457        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
9458        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
9459     return Op;
9460 
9461   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
9462   // perfect shuffle table to emit an optimal matching sequence.
9463   ArrayRef<int> PermMask = SVOp->getMask();
9464 
9465   unsigned PFIndexes[4];
9466   bool isFourElementShuffle = true;
9467   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
9468     unsigned EltNo = 8;   // Start out undef.
9469     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
9470       if (PermMask[i*4+j] < 0)
9471         continue;   // Undef, ignore it.
9472 
9473       unsigned ByteSource = PermMask[i*4+j];
9474       if ((ByteSource & 3) != j) {
9475         isFourElementShuffle = false;
9476         break;
9477       }
9478 
9479       if (EltNo == 8) {
9480         EltNo = ByteSource/4;
9481       } else if (EltNo != ByteSource/4) {
9482         isFourElementShuffle = false;
9483         break;
9484       }
9485     }
9486     PFIndexes[i] = EltNo;
9487   }
9488 
9489   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
9490   // perfect shuffle vector to determine if it is cost effective to do this as
9491   // discrete instructions, or whether we should use a vperm.
9492   // For now, we skip this for little endian until such time as we have a
9493   // little-endian perfect shuffle table.
9494   if (isFourElementShuffle && !isLittleEndian) {
9495     // Compute the index in the perfect shuffle table.
9496     unsigned PFTableIndex =
9497       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9498 
9499     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9500     unsigned Cost  = (PFEntry >> 30);
9501 
9502     // Determining when to avoid vperm is tricky.  Many things affect the cost
9503     // of vperm, particularly how many times the perm mask needs to be computed.
9504     // For example, if the perm mask can be hoisted out of a loop or is already
9505     // used (perhaps because there are multiple permutes with the same shuffle
9506     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
9507     // the loop requires an extra register.
9508     //
9509     // As a compromise, we only emit discrete instructions if the shuffle can be
9510     // generated in 3 or fewer operations.  When we have loop information
9511     // available, if this block is within a loop, we should avoid using vperm
9512     // for 3-operation perms and use a constant pool load instead.
9513     if (Cost < 3)
9514       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9515   }
9516 
9517   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9518   // vector that will get spilled to the constant pool.
9519   if (V2.isUndef()) V2 = V1;
9520 
9521   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9522   // that it is in input element units, not in bytes.  Convert now.
9523 
9524   // For little endian, the order of the input vectors is reversed, and
9525   // the permutation mask is complemented with respect to 31.  This is
9526   // necessary to produce proper semantics with the big-endian-biased vperm
9527   // instruction.
9528   EVT EltVT = V1.getValueType().getVectorElementType();
9529   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
9530 
9531   SmallVector<SDValue, 16> ResultMask;
9532   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
9533     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
9534 
9535     for (unsigned j = 0; j != BytesPerElement; ++j)
9536       if (isLittleEndian)
9537         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
9538                                              dl, MVT::i32));
9539       else
9540         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
9541                                              MVT::i32));
9542   }
9543 
9544   ShufflesHandledWithVPERM++;
9545   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
9546   LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
9547   LLVM_DEBUG(SVOp->dump());
9548   LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
9549   LLVM_DEBUG(VPermMask.dump());
9550 
9551   if (isLittleEndian)
9552     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9553                        V2, V1, VPermMask);
9554   else
9555     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9556                        V1, V2, VPermMask);
9557 }
9558 
9559 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9560 /// vector comparison.  If it is, return true and fill in Opc/isDot with
9561 /// information about the intrinsic.
9562 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
9563                                  bool &isDot, const PPCSubtarget &Subtarget) {
9564   unsigned IntrinsicID =
9565       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9566   CompareOpc = -1;
9567   isDot = false;
9568   switch (IntrinsicID) {
9569   default:
9570     return false;
9571   // Comparison predicates.
9572   case Intrinsic::ppc_altivec_vcmpbfp_p:
9573     CompareOpc = 966;
9574     isDot = true;
9575     break;
9576   case Intrinsic::ppc_altivec_vcmpeqfp_p:
9577     CompareOpc = 198;
9578     isDot = true;
9579     break;
9580   case Intrinsic::ppc_altivec_vcmpequb_p:
9581     CompareOpc = 6;
9582     isDot = true;
9583     break;
9584   case Intrinsic::ppc_altivec_vcmpequh_p:
9585     CompareOpc = 70;
9586     isDot = true;
9587     break;
9588   case Intrinsic::ppc_altivec_vcmpequw_p:
9589     CompareOpc = 134;
9590     isDot = true;
9591     break;
9592   case Intrinsic::ppc_altivec_vcmpequd_p:
9593     if (Subtarget.hasP8Altivec()) {
9594       CompareOpc = 199;
9595       isDot = true;
9596     } else
9597       return false;
9598     break;
9599   case Intrinsic::ppc_altivec_vcmpneb_p:
9600   case Intrinsic::ppc_altivec_vcmpneh_p:
9601   case Intrinsic::ppc_altivec_vcmpnew_p:
9602   case Intrinsic::ppc_altivec_vcmpnezb_p:
9603   case Intrinsic::ppc_altivec_vcmpnezh_p:
9604   case Intrinsic::ppc_altivec_vcmpnezw_p:
9605     if (Subtarget.hasP9Altivec()) {
9606       switch (IntrinsicID) {
9607       default:
9608         llvm_unreachable("Unknown comparison intrinsic.");
9609       case Intrinsic::ppc_altivec_vcmpneb_p:
9610         CompareOpc = 7;
9611         break;
9612       case Intrinsic::ppc_altivec_vcmpneh_p:
9613         CompareOpc = 71;
9614         break;
9615       case Intrinsic::ppc_altivec_vcmpnew_p:
9616         CompareOpc = 135;
9617         break;
9618       case Intrinsic::ppc_altivec_vcmpnezb_p:
9619         CompareOpc = 263;
9620         break;
9621       case Intrinsic::ppc_altivec_vcmpnezh_p:
9622         CompareOpc = 327;
9623         break;
9624       case Intrinsic::ppc_altivec_vcmpnezw_p:
9625         CompareOpc = 391;
9626         break;
9627       }
9628       isDot = true;
9629     } else
9630       return false;
9631     break;
9632   case Intrinsic::ppc_altivec_vcmpgefp_p:
9633     CompareOpc = 454;
9634     isDot = true;
9635     break;
9636   case Intrinsic::ppc_altivec_vcmpgtfp_p:
9637     CompareOpc = 710;
9638     isDot = true;
9639     break;
9640   case Intrinsic::ppc_altivec_vcmpgtsb_p:
9641     CompareOpc = 774;
9642     isDot = true;
9643     break;
9644   case Intrinsic::ppc_altivec_vcmpgtsh_p:
9645     CompareOpc = 838;
9646     isDot = true;
9647     break;
9648   case Intrinsic::ppc_altivec_vcmpgtsw_p:
9649     CompareOpc = 902;
9650     isDot = true;
9651     break;
9652   case Intrinsic::ppc_altivec_vcmpgtsd_p:
9653     if (Subtarget.hasP8Altivec()) {
9654       CompareOpc = 967;
9655       isDot = true;
9656     } else
9657       return false;
9658     break;
9659   case Intrinsic::ppc_altivec_vcmpgtub_p:
9660     CompareOpc = 518;
9661     isDot = true;
9662     break;
9663   case Intrinsic::ppc_altivec_vcmpgtuh_p:
9664     CompareOpc = 582;
9665     isDot = true;
9666     break;
9667   case Intrinsic::ppc_altivec_vcmpgtuw_p:
9668     CompareOpc = 646;
9669     isDot = true;
9670     break;
9671   case Intrinsic::ppc_altivec_vcmpgtud_p:
9672     if (Subtarget.hasP8Altivec()) {
9673       CompareOpc = 711;
9674       isDot = true;
9675     } else
9676       return false;
9677     break;
9678 
9679   case Intrinsic::ppc_altivec_vcmpequq:
9680   case Intrinsic::ppc_altivec_vcmpgtsq:
9681   case Intrinsic::ppc_altivec_vcmpgtuq:
9682     if (!Subtarget.isISA3_1())
9683       return false;
9684     switch (IntrinsicID) {
9685     default:
9686       llvm_unreachable("Unknown comparison intrinsic.");
9687     case Intrinsic::ppc_altivec_vcmpequq:
9688       CompareOpc = 455;
9689       break;
9690     case Intrinsic::ppc_altivec_vcmpgtsq:
9691       CompareOpc = 903;
9692       break;
9693     case Intrinsic::ppc_altivec_vcmpgtuq:
9694       CompareOpc = 647;
9695       break;
9696     }
9697     break;
9698 
9699   // VSX predicate comparisons use the same infrastructure
9700   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9701   case Intrinsic::ppc_vsx_xvcmpgedp_p:
9702   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9703   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9704   case Intrinsic::ppc_vsx_xvcmpgesp_p:
9705   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9706     if (Subtarget.hasVSX()) {
9707       switch (IntrinsicID) {
9708       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9709         CompareOpc = 99;
9710         break;
9711       case Intrinsic::ppc_vsx_xvcmpgedp_p:
9712         CompareOpc = 115;
9713         break;
9714       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9715         CompareOpc = 107;
9716         break;
9717       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9718         CompareOpc = 67;
9719         break;
9720       case Intrinsic::ppc_vsx_xvcmpgesp_p:
9721         CompareOpc = 83;
9722         break;
9723       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9724         CompareOpc = 75;
9725         break;
9726       }
9727       isDot = true;
9728     } else
9729       return false;
9730     break;
9731 
9732   // Normal Comparisons.
9733   case Intrinsic::ppc_altivec_vcmpbfp:
9734     CompareOpc = 966;
9735     break;
9736   case Intrinsic::ppc_altivec_vcmpeqfp:
9737     CompareOpc = 198;
9738     break;
9739   case Intrinsic::ppc_altivec_vcmpequb:
9740     CompareOpc = 6;
9741     break;
9742   case Intrinsic::ppc_altivec_vcmpequh:
9743     CompareOpc = 70;
9744     break;
9745   case Intrinsic::ppc_altivec_vcmpequw:
9746     CompareOpc = 134;
9747     break;
9748   case Intrinsic::ppc_altivec_vcmpequd:
9749     if (Subtarget.hasP8Altivec())
9750       CompareOpc = 199;
9751     else
9752       return false;
9753     break;
9754   case Intrinsic::ppc_altivec_vcmpneb:
9755   case Intrinsic::ppc_altivec_vcmpneh:
9756   case Intrinsic::ppc_altivec_vcmpnew:
9757   case Intrinsic::ppc_altivec_vcmpnezb:
9758   case Intrinsic::ppc_altivec_vcmpnezh:
9759   case Intrinsic::ppc_altivec_vcmpnezw:
9760     if (Subtarget.hasP9Altivec())
9761       switch (IntrinsicID) {
9762       default:
9763         llvm_unreachable("Unknown comparison intrinsic.");
9764       case Intrinsic::ppc_altivec_vcmpneb:
9765         CompareOpc = 7;
9766         break;
9767       case Intrinsic::ppc_altivec_vcmpneh:
9768         CompareOpc = 71;
9769         break;
9770       case Intrinsic::ppc_altivec_vcmpnew:
9771         CompareOpc = 135;
9772         break;
9773       case Intrinsic::ppc_altivec_vcmpnezb:
9774         CompareOpc = 263;
9775         break;
9776       case Intrinsic::ppc_altivec_vcmpnezh:
9777         CompareOpc = 327;
9778         break;
9779       case Intrinsic::ppc_altivec_vcmpnezw:
9780         CompareOpc = 391;
9781         break;
9782       }
9783     else
9784       return false;
9785     break;
9786   case Intrinsic::ppc_altivec_vcmpgefp:
9787     CompareOpc = 454;
9788     break;
9789   case Intrinsic::ppc_altivec_vcmpgtfp:
9790     CompareOpc = 710;
9791     break;
9792   case Intrinsic::ppc_altivec_vcmpgtsb:
9793     CompareOpc = 774;
9794     break;
9795   case Intrinsic::ppc_altivec_vcmpgtsh:
9796     CompareOpc = 838;
9797     break;
9798   case Intrinsic::ppc_altivec_vcmpgtsw:
9799     CompareOpc = 902;
9800     break;
9801   case Intrinsic::ppc_altivec_vcmpgtsd:
9802     if (Subtarget.hasP8Altivec())
9803       CompareOpc = 967;
9804     else
9805       return false;
9806     break;
9807   case Intrinsic::ppc_altivec_vcmpgtub:
9808     CompareOpc = 518;
9809     break;
9810   case Intrinsic::ppc_altivec_vcmpgtuh:
9811     CompareOpc = 582;
9812     break;
9813   case Intrinsic::ppc_altivec_vcmpgtuw:
9814     CompareOpc = 646;
9815     break;
9816   case Intrinsic::ppc_altivec_vcmpgtud:
9817     if (Subtarget.hasP8Altivec())
9818       CompareOpc = 711;
9819     else
9820       return false;
9821     break;
9822   case Intrinsic::ppc_altivec_vcmpequq_p:
9823   case Intrinsic::ppc_altivec_vcmpgtsq_p:
9824   case Intrinsic::ppc_altivec_vcmpgtuq_p:
9825     if (!Subtarget.isISA3_1())
9826       return false;
9827     switch (IntrinsicID) {
9828     default:
9829       llvm_unreachable("Unknown comparison intrinsic.");
9830     case Intrinsic::ppc_altivec_vcmpequq_p:
9831       CompareOpc = 455;
9832       break;
9833     case Intrinsic::ppc_altivec_vcmpgtsq_p:
9834       CompareOpc = 903;
9835       break;
9836     case Intrinsic::ppc_altivec_vcmpgtuq_p:
9837       CompareOpc = 647;
9838       break;
9839     }
9840     isDot = true;
9841     break;
9842   }
9843   return true;
9844 }
9845 
9846 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9847 /// lower, do it, otherwise return null.
9848 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9849                                                    SelectionDAG &DAG) const {
9850   unsigned IntrinsicID =
9851     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9852 
9853   SDLoc dl(Op);
9854 
9855   switch (IntrinsicID) {
9856   case Intrinsic::thread_pointer:
9857     // Reads the thread pointer register, used for __builtin_thread_pointer.
9858     if (Subtarget.isPPC64())
9859       return DAG.getRegister(PPC::X13, MVT::i64);
9860     return DAG.getRegister(PPC::R2, MVT::i32);
9861 
9862   case Intrinsic::ppc_mma_disassemble_acc:
9863   case Intrinsic::ppc_vsx_disassemble_pair: {
9864     int NumVecs = 2;
9865     SDValue WideVec = Op.getOperand(1);
9866     if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
9867       NumVecs = 4;
9868       WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
9869     }
9870     SmallVector<SDValue, 4> RetOps;
9871     for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
9872       SDValue Extract = DAG.getNode(
9873           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
9874           DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
9875                                                      : VecNo,
9876                           dl, MVT::i64));
9877       RetOps.push_back(Extract);
9878     }
9879     return DAG.getMergeValues(RetOps, dl);
9880   }
9881   }
9882 
9883   // If this is a lowered altivec predicate compare, CompareOpc is set to the
9884   // opcode number of the comparison.
9885   int CompareOpc;
9886   bool isDot;
9887   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
9888     return SDValue();    // Don't custom lower most intrinsics.
9889 
9890   // If this is a non-dot comparison, make the VCMP node and we are done.
9891   if (!isDot) {
9892     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
9893                               Op.getOperand(1), Op.getOperand(2),
9894                               DAG.getConstant(CompareOpc, dl, MVT::i32));
9895     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
9896   }
9897 
9898   // Create the PPCISD altivec 'dot' comparison node.
9899   SDValue Ops[] = {
9900     Op.getOperand(2),  // LHS
9901     Op.getOperand(3),  // RHS
9902     DAG.getConstant(CompareOpc, dl, MVT::i32)
9903   };
9904   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
9905   SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
9906 
9907   // Now that we have the comparison, emit a copy from the CR to a GPR.
9908   // This is flagged to the above dot comparison.
9909   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
9910                                 DAG.getRegister(PPC::CR6, MVT::i32),
9911                                 CompNode.getValue(1));
9912 
9913   // Unpack the result based on how the target uses it.
9914   unsigned BitNo;   // Bit # of CR6.
9915   bool InvertBit;   // Invert result?
9916   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
9917   default:  // Can't happen, don't crash on invalid number though.
9918   case 0:   // Return the value of the EQ bit of CR6.
9919     BitNo = 0; InvertBit = false;
9920     break;
9921   case 1:   // Return the inverted value of the EQ bit of CR6.
9922     BitNo = 0; InvertBit = true;
9923     break;
9924   case 2:   // Return the value of the LT bit of CR6.
9925     BitNo = 2; InvertBit = false;
9926     break;
9927   case 3:   // Return the inverted value of the LT bit of CR6.
9928     BitNo = 2; InvertBit = true;
9929     break;
9930   }
9931 
9932   // Shift the bit into the low position.
9933   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
9934                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
9935   // Isolate the bit.
9936   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
9937                       DAG.getConstant(1, dl, MVT::i32));
9938 
9939   // If we are supposed to, toggle the bit.
9940   if (InvertBit)
9941     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
9942                         DAG.getConstant(1, dl, MVT::i32));
9943   return Flags;
9944 }
9945 
9946 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9947                                                SelectionDAG &DAG) const {
9948   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9949   // the beginning of the argument list.
9950   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
9951   SDLoc DL(Op);
9952   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
9953   case Intrinsic::ppc_cfence: {
9954     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
9955     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
9956     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
9957                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
9958                                                   Op.getOperand(ArgStart + 1)),
9959                                       Op.getOperand(0)),
9960                    0);
9961   }
9962   default:
9963     break;
9964   }
9965   return SDValue();
9966 }
9967 
9968 // Lower scalar BSWAP64 to xxbrd.
9969 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
9970   SDLoc dl(Op);
9971   // MTVSRDD
9972   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
9973                    Op.getOperand(0));
9974   // XXBRD
9975   Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
9976   // MFVSRD
9977   int VectorIndex = 0;
9978   if (Subtarget.isLittleEndian())
9979     VectorIndex = 1;
9980   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
9981                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
9982   return Op;
9983 }
9984 
9985 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9986 // compared to a value that is atomically loaded (atomic loads zero-extend).
9987 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
9988                                                 SelectionDAG &DAG) const {
9989   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
9990          "Expecting an atomic compare-and-swap here.");
9991   SDLoc dl(Op);
9992   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
9993   EVT MemVT = AtomicNode->getMemoryVT();
9994   if (MemVT.getSizeInBits() >= 32)
9995     return Op;
9996 
9997   SDValue CmpOp = Op.getOperand(2);
9998   // If this is already correctly zero-extended, leave it alone.
9999   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10000   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10001     return Op;
10002 
10003   // Clear the high bits of the compare operand.
10004   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10005   SDValue NewCmpOp =
10006     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10007                 DAG.getConstant(MaskVal, dl, MVT::i32));
10008 
10009   // Replace the existing compare operand with the properly zero-extended one.
10010   SmallVector<SDValue, 4> Ops;
10011   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10012     Ops.push_back(AtomicNode->getOperand(i));
10013   Ops[2] = NewCmpOp;
10014   MachineMemOperand *MMO = AtomicNode->getMemOperand();
10015   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10016   auto NodeTy =
10017     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10018   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10019 }
10020 
10021 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10022                                                  SelectionDAG &DAG) const {
10023   SDLoc dl(Op);
10024   // Create a stack slot that is 16-byte aligned.
10025   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10026   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10027   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10028   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10029 
10030   // Store the input value into Value#0 of the stack slot.
10031   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10032                                MachinePointerInfo());
10033   // Load it out.
10034   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10035 }
10036 
10037 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10038                                                   SelectionDAG &DAG) const {
10039   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10040          "Should only be called for ISD::INSERT_VECTOR_ELT");
10041 
10042   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10043 
10044   EVT VT = Op.getValueType();
10045   SDLoc dl(Op);
10046   SDValue V1 = Op.getOperand(0);
10047   SDValue V2 = Op.getOperand(1);
10048   SDValue V3 = Op.getOperand(2);
10049 
10050   if (Subtarget.isISA3_1()) {
10051     // On P10, we have legal lowering for constant and variable indices for
10052     // integer vectors.
10053     if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10054         VT == MVT::v2i64)
10055       return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
10056     // For f32 and f64 vectors, we have legal lowering for variable indices.
10057     // For f32 we also have legal lowering when the element is loaded from
10058     // memory.
10059     if (VT == MVT::v4f32 || VT == MVT::v2f64) {
10060       if (!C || (VT == MVT::v4f32 && dyn_cast<LoadSDNode>(V2)))
10061         return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
10062       return SDValue();
10063     }
10064   }
10065 
10066   // Before P10, we have legal lowering for constant indices but not for
10067   // variable ones.
10068   if (!C)
10069     return SDValue();
10070 
10071   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10072   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10073     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10074     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10075     unsigned InsertAtElement = C->getZExtValue();
10076     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10077     if (Subtarget.isLittleEndian()) {
10078       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10079     }
10080     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10081                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
10082   }
10083   return Op;
10084 }
10085 
10086 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10087                                            SelectionDAG &DAG) const {
10088   SDLoc dl(Op);
10089   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10090   SDValue LoadChain = LN->getChain();
10091   SDValue BasePtr = LN->getBasePtr();
10092   EVT VT = Op.getValueType();
10093 
10094   if (VT != MVT::v256i1 && VT != MVT::v512i1)
10095     return Op;
10096 
10097   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10098   // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10099   // 2 or 4 vsx registers.
10100   assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10101          "Type unsupported without MMA");
10102   assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10103          "Type unsupported without paired vector support");
10104   Align Alignment = LN->getAlign();
10105   SmallVector<SDValue, 4> Loads;
10106   SmallVector<SDValue, 4> LoadChains;
10107   unsigned NumVecs = VT.getSizeInBits() / 128;
10108   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10109     SDValue Load =
10110         DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10111                     LN->getPointerInfo().getWithOffset(Idx * 16),
10112                     commonAlignment(Alignment, Idx * 16),
10113                     LN->getMemOperand()->getFlags(), LN->getAAInfo());
10114     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10115                           DAG.getConstant(16, dl, BasePtr.getValueType()));
10116     Loads.push_back(Load);
10117     LoadChains.push_back(Load.getValue(1));
10118   }
10119   if (Subtarget.isLittleEndian()) {
10120     std::reverse(Loads.begin(), Loads.end());
10121     std::reverse(LoadChains.begin(), LoadChains.end());
10122   }
10123   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10124   SDValue Value =
10125       DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
10126                   dl, VT, Loads);
10127   SDValue RetOps[] = {Value, TF};
10128   return DAG.getMergeValues(RetOps, dl);
10129 }
10130 
10131 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10132                                             SelectionDAG &DAG) const {
10133   SDLoc dl(Op);
10134   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10135   SDValue StoreChain = SN->getChain();
10136   SDValue BasePtr = SN->getBasePtr();
10137   SDValue Value = SN->getValue();
10138   EVT StoreVT = Value.getValueType();
10139 
10140   if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10141     return Op;
10142 
10143   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10144   // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10145   // underlying registers individually.
10146   assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10147          "Type unsupported without MMA");
10148   assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10149          "Type unsupported without paired vector support");
10150   Align Alignment = SN->getAlign();
10151   SmallVector<SDValue, 4> Stores;
10152   unsigned NumVecs = 2;
10153   if (StoreVT == MVT::v512i1) {
10154     Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
10155     NumVecs = 4;
10156   }
10157   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10158     unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10159     SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
10160                               DAG.getConstant(VecNum, dl, MVT::i64));
10161     SDValue Store =
10162         DAG.getStore(StoreChain, dl, Elt, BasePtr,
10163                      SN->getPointerInfo().getWithOffset(Idx * 16),
10164                      commonAlignment(Alignment, Idx * 16),
10165                      SN->getMemOperand()->getFlags(), SN->getAAInfo());
10166     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10167                           DAG.getConstant(16, dl, BasePtr.getValueType()));
10168     Stores.push_back(Store);
10169   }
10170   SDValue TF = DAG.getTokenFactor(dl, Stores);
10171   return TF;
10172 }
10173 
10174 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10175   SDLoc dl(Op);
10176   if (Op.getValueType() == MVT::v4i32) {
10177     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10178 
10179     SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10180     // +16 as shift amt.
10181     SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10182     SDValue RHSSwap =   // = vrlw RHS, 16
10183       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10184 
10185     // Shrinkify inputs to v8i16.
10186     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10187     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10188     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10189 
10190     // Low parts multiplied together, generating 32-bit results (we ignore the
10191     // top parts).
10192     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10193                                         LHS, RHS, DAG, dl, MVT::v4i32);
10194 
10195     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10196                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10197     // Shift the high parts up 16 bits.
10198     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10199                               Neg16, DAG, dl);
10200     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10201   } else if (Op.getValueType() == MVT::v16i8) {
10202     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10203     bool isLittleEndian = Subtarget.isLittleEndian();
10204 
10205     // Multiply the even 8-bit parts, producing 16-bit sums.
10206     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10207                                            LHS, RHS, DAG, dl, MVT::v8i16);
10208     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10209 
10210     // Multiply the odd 8-bit parts, producing 16-bit sums.
10211     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10212                                           LHS, RHS, DAG, dl, MVT::v8i16);
10213     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10214 
10215     // Merge the results together.  Because vmuleub and vmuloub are
10216     // instructions with a big-endian bias, we must reverse the
10217     // element numbering and reverse the meaning of "odd" and "even"
10218     // when generating little endian code.
10219     int Ops[16];
10220     for (unsigned i = 0; i != 8; ++i) {
10221       if (isLittleEndian) {
10222         Ops[i*2  ] = 2*i;
10223         Ops[i*2+1] = 2*i+16;
10224       } else {
10225         Ops[i*2  ] = 2*i+1;
10226         Ops[i*2+1] = 2*i+1+16;
10227       }
10228     }
10229     if (isLittleEndian)
10230       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10231     else
10232       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10233   } else {
10234     llvm_unreachable("Unknown mul to lower!");
10235   }
10236 }
10237 
10238 SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
10239   bool IsStrict = Op->isStrictFPOpcode();
10240   if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
10241       !Subtarget.hasP9Vector())
10242     return SDValue();
10243 
10244   return Op;
10245 }
10246 
10247 // Custom lowering for fpext vf32 to v2f64
10248 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10249 
10250   assert(Op.getOpcode() == ISD::FP_EXTEND &&
10251          "Should only be called for ISD::FP_EXTEND");
10252 
10253   // FIXME: handle extends from half precision float vectors on P9.
10254   // We only want to custom lower an extend from v2f32 to v2f64.
10255   if (Op.getValueType() != MVT::v2f64 ||
10256       Op.getOperand(0).getValueType() != MVT::v2f32)
10257     return SDValue();
10258 
10259   SDLoc dl(Op);
10260   SDValue Op0 = Op.getOperand(0);
10261 
10262   switch (Op0.getOpcode()) {
10263   default:
10264     return SDValue();
10265   case ISD::EXTRACT_SUBVECTOR: {
10266     assert(Op0.getNumOperands() == 2 &&
10267            isa<ConstantSDNode>(Op0->getOperand(1)) &&
10268            "Node should have 2 operands with second one being a constant!");
10269 
10270     if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10271       return SDValue();
10272 
10273     // Custom lower is only done for high or low doubleword.
10274     int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10275     if (Idx % 2 != 0)
10276       return SDValue();
10277 
10278     // Since input is v4f32, at this point Idx is either 0 or 2.
10279     // Shift to get the doubleword position we want.
10280     int DWord = Idx >> 1;
10281 
10282     // High and low word positions are different on little endian.
10283     if (Subtarget.isLittleEndian())
10284       DWord ^= 0x1;
10285 
10286     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10287                        Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10288   }
10289   case ISD::FADD:
10290   case ISD::FMUL:
10291   case ISD::FSUB: {
10292     SDValue NewLoad[2];
10293     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10294       // Ensure both input are loads.
10295       SDValue LdOp = Op0.getOperand(i);
10296       if (LdOp.getOpcode() != ISD::LOAD)
10297         return SDValue();
10298       // Generate new load node.
10299       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10300       SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10301       NewLoad[i] = DAG.getMemIntrinsicNode(
10302           PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10303           LD->getMemoryVT(), LD->getMemOperand());
10304     }
10305     SDValue NewOp =
10306         DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10307                     NewLoad[1], Op0.getNode()->getFlags());
10308     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10309                        DAG.getConstant(0, dl, MVT::i32));
10310   }
10311   case ISD::LOAD: {
10312     LoadSDNode *LD = cast<LoadSDNode>(Op0);
10313     SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10314     SDValue NewLd = DAG.getMemIntrinsicNode(
10315         PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10316         LD->getMemoryVT(), LD->getMemOperand());
10317     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10318                        DAG.getConstant(0, dl, MVT::i32));
10319   }
10320   }
10321   llvm_unreachable("ERROR:Should return for all cases within swtich.");
10322 }
10323 
10324 /// LowerOperation - Provide custom lowering hooks for some operations.
10325 ///
10326 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10327   switch (Op.getOpcode()) {
10328   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10329   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
10330   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
10331   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
10332   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
10333   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
10334   case ISD::SETCC:              return LowerSETCC(Op, DAG);
10335   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
10336   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
10337 
10338   // Variable argument lowering.
10339   case ISD::VASTART:            return LowerVASTART(Op, DAG);
10340   case ISD::VAARG:              return LowerVAARG(Op, DAG);
10341   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
10342 
10343   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
10344   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10345   case ISD::GET_DYNAMIC_AREA_OFFSET:
10346     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10347 
10348   // Exception handling lowering.
10349   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
10350   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
10351   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
10352 
10353   case ISD::LOAD:               return LowerLOAD(Op, DAG);
10354   case ISD::STORE:              return LowerSTORE(Op, DAG);
10355   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
10356   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
10357   case ISD::STRICT_FP_TO_UINT:
10358   case ISD::STRICT_FP_TO_SINT:
10359   case ISD::FP_TO_UINT:
10360   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10361   case ISD::STRICT_UINT_TO_FP:
10362   case ISD::STRICT_SINT_TO_FP:
10363   case ISD::UINT_TO_FP:
10364   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
10365   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
10366 
10367   // Lower 64-bit shifts.
10368   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
10369   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
10370   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
10371 
10372   case ISD::FSHL:               return LowerFunnelShift(Op, DAG);
10373   case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
10374 
10375   // Vector-related lowering.
10376   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
10377   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
10378   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10379   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
10380   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
10381   case ISD::MUL:                return LowerMUL(Op, DAG);
10382   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
10383   case ISD::STRICT_FP_ROUND:
10384   case ISD::FP_ROUND:
10385     return LowerFP_ROUND(Op, DAG);
10386   case ISD::ROTL:               return LowerROTL(Op, DAG);
10387 
10388   // For counter-based loop handling.
10389   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
10390 
10391   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
10392 
10393   // Frame & Return address.
10394   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
10395   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
10396 
10397   case ISD::INTRINSIC_VOID:
10398     return LowerINTRINSIC_VOID(Op, DAG);
10399   case ISD::BSWAP:
10400     return LowerBSWAP(Op, DAG);
10401   case ISD::ATOMIC_CMP_SWAP:
10402     return LowerATOMIC_CMP_SWAP(Op, DAG);
10403   }
10404 }
10405 
10406 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
10407                                            SmallVectorImpl<SDValue>&Results,
10408                                            SelectionDAG &DAG) const {
10409   SDLoc dl(N);
10410   switch (N->getOpcode()) {
10411   default:
10412     llvm_unreachable("Do not know how to custom type legalize this operation!");
10413   case ISD::READCYCLECOUNTER: {
10414     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
10415     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10416 
10417     Results.push_back(
10418         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
10419     Results.push_back(RTB.getValue(2));
10420     break;
10421   }
10422   case ISD::INTRINSIC_W_CHAIN: {
10423     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10424         Intrinsic::loop_decrement)
10425       break;
10426 
10427     assert(N->getValueType(0) == MVT::i1 &&
10428            "Unexpected result type for CTR decrement intrinsic");
10429     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10430                                  N->getValueType(0));
10431     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10432     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10433                                  N->getOperand(1));
10434 
10435     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10436     Results.push_back(NewInt.getValue(1));
10437     break;
10438   }
10439   case ISD::VAARG: {
10440     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10441       return;
10442 
10443     EVT VT = N->getValueType(0);
10444 
10445     if (VT == MVT::i64) {
10446       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10447 
10448       Results.push_back(NewNode);
10449       Results.push_back(NewNode.getValue(1));
10450     }
10451     return;
10452   }
10453   case ISD::STRICT_FP_TO_SINT:
10454   case ISD::STRICT_FP_TO_UINT:
10455   case ISD::FP_TO_SINT:
10456   case ISD::FP_TO_UINT:
10457     // LowerFP_TO_INT() can only handle f32 and f64.
10458     if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
10459         MVT::ppcf128)
10460       return;
10461     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10462     return;
10463   case ISD::TRUNCATE: {
10464     if (!N->getValueType(0).isVector())
10465       return;
10466     SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
10467     if (Lowered)
10468       Results.push_back(Lowered);
10469     return;
10470   }
10471   case ISD::FSHL:
10472   case ISD::FSHR:
10473     // Don't handle funnel shifts here.
10474     return;
10475   case ISD::BITCAST:
10476     // Don't handle bitcast here.
10477     return;
10478   case ISD::FP_EXTEND:
10479     SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
10480     if (Lowered)
10481       Results.push_back(Lowered);
10482     return;
10483   }
10484 }
10485 
10486 //===----------------------------------------------------------------------===//
10487 //  Other Lowering Code
10488 //===----------------------------------------------------------------------===//
10489 
10490 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
10491   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10492   Function *Func = Intrinsic::getDeclaration(M, Id);
10493   return Builder.CreateCall(Func, {});
10494 }
10495 
10496 // The mappings for emitLeading/TrailingFence is taken from
10497 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10498 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
10499                                                  Instruction *Inst,
10500                                                  AtomicOrdering Ord) const {
10501   if (Ord == AtomicOrdering::SequentiallyConsistent)
10502     return callIntrinsic(Builder, Intrinsic::ppc_sync);
10503   if (isReleaseOrStronger(Ord))
10504     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10505   return nullptr;
10506 }
10507 
10508 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
10509                                                   Instruction *Inst,
10510                                                   AtomicOrdering Ord) const {
10511   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
10512     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10513     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10514     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10515     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
10516       return Builder.CreateCall(
10517           Intrinsic::getDeclaration(
10518               Builder.GetInsertBlock()->getParent()->getParent(),
10519               Intrinsic::ppc_cfence, {Inst->getType()}),
10520           {Inst});
10521     // FIXME: Can use isync for rmw operation.
10522     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10523   }
10524   return nullptr;
10525 }
10526 
10527 MachineBasicBlock *
10528 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
10529                                     unsigned AtomicSize,
10530                                     unsigned BinOpcode,
10531                                     unsigned CmpOpcode,
10532                                     unsigned CmpPred) const {
10533   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10534   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10535 
10536   auto LoadMnemonic = PPC::LDARX;
10537   auto StoreMnemonic = PPC::STDCX;
10538   switch (AtomicSize) {
10539   default:
10540     llvm_unreachable("Unexpected size of atomic entity");
10541   case 1:
10542     LoadMnemonic = PPC::LBARX;
10543     StoreMnemonic = PPC::STBCX;
10544     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10545     break;
10546   case 2:
10547     LoadMnemonic = PPC::LHARX;
10548     StoreMnemonic = PPC::STHCX;
10549     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10550     break;
10551   case 4:
10552     LoadMnemonic = PPC::LWARX;
10553     StoreMnemonic = PPC::STWCX;
10554     break;
10555   case 8:
10556     LoadMnemonic = PPC::LDARX;
10557     StoreMnemonic = PPC::STDCX;
10558     break;
10559   }
10560 
10561   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10562   MachineFunction *F = BB->getParent();
10563   MachineFunction::iterator It = ++BB->getIterator();
10564 
10565   Register dest = MI.getOperand(0).getReg();
10566   Register ptrA = MI.getOperand(1).getReg();
10567   Register ptrB = MI.getOperand(2).getReg();
10568   Register incr = MI.getOperand(3).getReg();
10569   DebugLoc dl = MI.getDebugLoc();
10570 
10571   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10572   MachineBasicBlock *loop2MBB =
10573     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10574   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10575   F->insert(It, loopMBB);
10576   if (CmpOpcode)
10577     F->insert(It, loop2MBB);
10578   F->insert(It, exitMBB);
10579   exitMBB->splice(exitMBB->begin(), BB,
10580                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
10581   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10582 
10583   MachineRegisterInfo &RegInfo = F->getRegInfo();
10584   Register TmpReg = (!BinOpcode) ? incr :
10585     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
10586                                            : &PPC::GPRCRegClass);
10587 
10588   //  thisMBB:
10589   //   ...
10590   //   fallthrough --> loopMBB
10591   BB->addSuccessor(loopMBB);
10592 
10593   //  loopMBB:
10594   //   l[wd]arx dest, ptr
10595   //   add r0, dest, incr
10596   //   st[wd]cx. r0, ptr
10597   //   bne- loopMBB
10598   //   fallthrough --> exitMBB
10599 
10600   // For max/min...
10601   //  loopMBB:
10602   //   l[wd]arx dest, ptr
10603   //   cmpl?[wd] incr, dest
10604   //   bgt exitMBB
10605   //  loop2MBB:
10606   //   st[wd]cx. dest, ptr
10607   //   bne- loopMBB
10608   //   fallthrough --> exitMBB
10609 
10610   BB = loopMBB;
10611   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10612     .addReg(ptrA).addReg(ptrB);
10613   if (BinOpcode)
10614     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
10615   if (CmpOpcode) {
10616     // Signed comparisons of byte or halfword values must be sign-extended.
10617     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
10618       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10619       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
10620               ExtReg).addReg(dest);
10621       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10622         .addReg(incr).addReg(ExtReg);
10623     } else
10624       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10625         .addReg(incr).addReg(dest);
10626 
10627     BuildMI(BB, dl, TII->get(PPC::BCC))
10628       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
10629     BB->addSuccessor(loop2MBB);
10630     BB->addSuccessor(exitMBB);
10631     BB = loop2MBB;
10632   }
10633   BuildMI(BB, dl, TII->get(StoreMnemonic))
10634     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
10635   BuildMI(BB, dl, TII->get(PPC::BCC))
10636     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
10637   BB->addSuccessor(loopMBB);
10638   BB->addSuccessor(exitMBB);
10639 
10640   //  exitMBB:
10641   //   ...
10642   BB = exitMBB;
10643   return BB;
10644 }
10645 
10646 static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
10647   switch(MI.getOpcode()) {
10648   default:
10649     return false;
10650   case PPC::COPY:
10651     return TII->isSignExtended(MI);
10652   case PPC::LHA:
10653   case PPC::LHA8:
10654   case PPC::LHAU:
10655   case PPC::LHAU8:
10656   case PPC::LHAUX:
10657   case PPC::LHAUX8:
10658   case PPC::LHAX:
10659   case PPC::LHAX8:
10660   case PPC::LWA:
10661   case PPC::LWAUX:
10662   case PPC::LWAX:
10663   case PPC::LWAX_32:
10664   case PPC::LWA_32:
10665   case PPC::PLHA:
10666   case PPC::PLHA8:
10667   case PPC::PLHA8pc:
10668   case PPC::PLHApc:
10669   case PPC::PLWA:
10670   case PPC::PLWA8:
10671   case PPC::PLWA8pc:
10672   case PPC::PLWApc:
10673   case PPC::EXTSB:
10674   case PPC::EXTSB8:
10675   case PPC::EXTSB8_32_64:
10676   case PPC::EXTSB8_rec:
10677   case PPC::EXTSB_rec:
10678   case PPC::EXTSH:
10679   case PPC::EXTSH8:
10680   case PPC::EXTSH8_32_64:
10681   case PPC::EXTSH8_rec:
10682   case PPC::EXTSH_rec:
10683   case PPC::EXTSW:
10684   case PPC::EXTSWSLI:
10685   case PPC::EXTSWSLI_32_64:
10686   case PPC::EXTSWSLI_32_64_rec:
10687   case PPC::EXTSWSLI_rec:
10688   case PPC::EXTSW_32:
10689   case PPC::EXTSW_32_64:
10690   case PPC::EXTSW_32_64_rec:
10691   case PPC::EXTSW_rec:
10692   case PPC::SRAW:
10693   case PPC::SRAWI:
10694   case PPC::SRAWI_rec:
10695   case PPC::SRAW_rec:
10696     return true;
10697   }
10698   return false;
10699 }
10700 
10701 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
10702     MachineInstr &MI, MachineBasicBlock *BB,
10703     bool is8bit, // operation
10704     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
10705   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10706   const PPCInstrInfo *TII = Subtarget.getInstrInfo();
10707 
10708   // If this is a signed comparison and the value being compared is not known
10709   // to be sign extended, sign extend it here.
10710   DebugLoc dl = MI.getDebugLoc();
10711   MachineFunction *F = BB->getParent();
10712   MachineRegisterInfo &RegInfo = F->getRegInfo();
10713   Register incr = MI.getOperand(3).getReg();
10714   bool IsSignExtended = Register::isVirtualRegister(incr) &&
10715     isSignExtended(*RegInfo.getVRegDef(incr), TII);
10716 
10717   if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
10718     Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10719     BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
10720         .addReg(MI.getOperand(3).getReg());
10721     MI.getOperand(3).setReg(ValueReg);
10722   }
10723   // If we support part-word atomic mnemonics, just use them
10724   if (Subtarget.hasPartwordAtomics())
10725     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
10726                             CmpPred);
10727 
10728   // In 64 bit mode we have to use 64 bits for addresses, even though the
10729   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
10730   // registers without caring whether they're 32 or 64, but here we're
10731   // doing actual arithmetic on the addresses.
10732   bool is64bit = Subtarget.isPPC64();
10733   bool isLittleEndian = Subtarget.isLittleEndian();
10734   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10735 
10736   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10737   MachineFunction::iterator It = ++BB->getIterator();
10738 
10739   Register dest = MI.getOperand(0).getReg();
10740   Register ptrA = MI.getOperand(1).getReg();
10741   Register ptrB = MI.getOperand(2).getReg();
10742 
10743   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10744   MachineBasicBlock *loop2MBB =
10745       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10746   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10747   F->insert(It, loopMBB);
10748   if (CmpOpcode)
10749     F->insert(It, loop2MBB);
10750   F->insert(It, exitMBB);
10751   exitMBB->splice(exitMBB->begin(), BB,
10752                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
10753   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10754 
10755   const TargetRegisterClass *RC =
10756       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10757   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10758 
10759   Register PtrReg = RegInfo.createVirtualRegister(RC);
10760   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10761   Register ShiftReg =
10762       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
10763   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
10764   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
10765   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10766   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10767   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10768   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
10769   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10770   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10771   Register Ptr1Reg;
10772   Register TmpReg =
10773       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
10774 
10775   //  thisMBB:
10776   //   ...
10777   //   fallthrough --> loopMBB
10778   BB->addSuccessor(loopMBB);
10779 
10780   // The 4-byte load must be aligned, while a char or short may be
10781   // anywhere in the word.  Hence all this nasty bookkeeping code.
10782   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
10783   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10784   //   xori shift, shift1, 24 [16]
10785   //   rlwinm ptr, ptr1, 0, 0, 29
10786   //   slw incr2, incr, shift
10787   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10788   //   slw mask, mask2, shift
10789   //  loopMBB:
10790   //   lwarx tmpDest, ptr
10791   //   add tmp, tmpDest, incr2
10792   //   andc tmp2, tmpDest, mask
10793   //   and tmp3, tmp, mask
10794   //   or tmp4, tmp3, tmp2
10795   //   stwcx. tmp4, ptr
10796   //   bne- loopMBB
10797   //   fallthrough --> exitMBB
10798   //   srw dest, tmpDest, shift
10799   if (ptrA != ZeroReg) {
10800     Ptr1Reg = RegInfo.createVirtualRegister(RC);
10801     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10802         .addReg(ptrA)
10803         .addReg(ptrB);
10804   } else {
10805     Ptr1Reg = ptrB;
10806   }
10807   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10808   // mode.
10809   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10810       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10811       .addImm(3)
10812       .addImm(27)
10813       .addImm(is8bit ? 28 : 27);
10814   if (!isLittleEndian)
10815     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10816         .addReg(Shift1Reg)
10817         .addImm(is8bit ? 24 : 16);
10818   if (is64bit)
10819     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10820         .addReg(Ptr1Reg)
10821         .addImm(0)
10822         .addImm(61);
10823   else
10824     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10825         .addReg(Ptr1Reg)
10826         .addImm(0)
10827         .addImm(0)
10828         .addImm(29);
10829   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10830   if (is8bit)
10831     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10832   else {
10833     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10834     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10835         .addReg(Mask3Reg)
10836         .addImm(65535);
10837   }
10838   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10839       .addReg(Mask2Reg)
10840       .addReg(ShiftReg);
10841 
10842   BB = loopMBB;
10843   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10844       .addReg(ZeroReg)
10845       .addReg(PtrReg);
10846   if (BinOpcode)
10847     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10848         .addReg(Incr2Reg)
10849         .addReg(TmpDestReg);
10850   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10851       .addReg(TmpDestReg)
10852       .addReg(MaskReg);
10853   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10854   if (CmpOpcode) {
10855     // For unsigned comparisons, we can directly compare the shifted values.
10856     // For signed comparisons we shift and sign extend.
10857     Register SReg = RegInfo.createVirtualRegister(GPRC);
10858     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10859         .addReg(TmpDestReg)
10860         .addReg(MaskReg);
10861     unsigned ValueReg = SReg;
10862     unsigned CmpReg = Incr2Reg;
10863     if (CmpOpcode == PPC::CMPW) {
10864       ValueReg = RegInfo.createVirtualRegister(GPRC);
10865       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10866           .addReg(SReg)
10867           .addReg(ShiftReg);
10868       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
10869       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
10870           .addReg(ValueReg);
10871       ValueReg = ValueSReg;
10872       CmpReg = incr;
10873     }
10874     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10875         .addReg(CmpReg)
10876         .addReg(ValueReg);
10877     BuildMI(BB, dl, TII->get(PPC::BCC))
10878         .addImm(CmpPred)
10879         .addReg(PPC::CR0)
10880         .addMBB(exitMBB);
10881     BB->addSuccessor(loop2MBB);
10882     BB->addSuccessor(exitMBB);
10883     BB = loop2MBB;
10884   }
10885   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10886   BuildMI(BB, dl, TII->get(PPC::STWCX))
10887       .addReg(Tmp4Reg)
10888       .addReg(ZeroReg)
10889       .addReg(PtrReg);
10890   BuildMI(BB, dl, TII->get(PPC::BCC))
10891       .addImm(PPC::PRED_NE)
10892       .addReg(PPC::CR0)
10893       .addMBB(loopMBB);
10894   BB->addSuccessor(loopMBB);
10895   BB->addSuccessor(exitMBB);
10896 
10897   //  exitMBB:
10898   //   ...
10899   BB = exitMBB;
10900   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10901       .addReg(TmpDestReg)
10902       .addReg(ShiftReg);
10903   return BB;
10904 }
10905 
10906 llvm::MachineBasicBlock *
10907 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
10908                                     MachineBasicBlock *MBB) const {
10909   DebugLoc DL = MI.getDebugLoc();
10910   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10911   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
10912 
10913   MachineFunction *MF = MBB->getParent();
10914   MachineRegisterInfo &MRI = MF->getRegInfo();
10915 
10916   const BasicBlock *BB = MBB->getBasicBlock();
10917   MachineFunction::iterator I = ++MBB->getIterator();
10918 
10919   Register DstReg = MI.getOperand(0).getReg();
10920   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
10921   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
10922   Register mainDstReg = MRI.createVirtualRegister(RC);
10923   Register restoreDstReg = MRI.createVirtualRegister(RC);
10924 
10925   MVT PVT = getPointerTy(MF->getDataLayout());
10926   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10927          "Invalid Pointer Size!");
10928   // For v = setjmp(buf), we generate
10929   //
10930   // thisMBB:
10931   //  SjLjSetup mainMBB
10932   //  bl mainMBB
10933   //  v_restore = 1
10934   //  b sinkMBB
10935   //
10936   // mainMBB:
10937   //  buf[LabelOffset] = LR
10938   //  v_main = 0
10939   //
10940   // sinkMBB:
10941   //  v = phi(main, restore)
10942   //
10943 
10944   MachineBasicBlock *thisMBB = MBB;
10945   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
10946   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
10947   MF->insert(I, mainMBB);
10948   MF->insert(I, sinkMBB);
10949 
10950   MachineInstrBuilder MIB;
10951 
10952   // Transfer the remainder of BB and its successor edges to sinkMBB.
10953   sinkMBB->splice(sinkMBB->begin(), MBB,
10954                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10955   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
10956 
10957   // Note that the structure of the jmp_buf used here is not compatible
10958   // with that used by libc, and is not designed to be. Specifically, it
10959   // stores only those 'reserved' registers that LLVM does not otherwise
10960   // understand how to spill. Also, by convention, by the time this
10961   // intrinsic is called, Clang has already stored the frame address in the
10962   // first slot of the buffer and stack address in the third. Following the
10963   // X86 target code, we'll store the jump address in the second slot. We also
10964   // need to save the TOC pointer (R2) to handle jumps between shared
10965   // libraries, and that will be stored in the fourth slot. The thread
10966   // identifier (R13) is not affected.
10967 
10968   // thisMBB:
10969   const int64_t LabelOffset = 1 * PVT.getStoreSize();
10970   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
10971   const int64_t BPOffset    = 4 * PVT.getStoreSize();
10972 
10973   // Prepare IP either in reg.
10974   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
10975   Register LabelReg = MRI.createVirtualRegister(PtrRC);
10976   Register BufReg = MI.getOperand(1).getReg();
10977 
10978   if (Subtarget.is64BitELFABI()) {
10979     setUsesTOCBasePtr(*MBB->getParent());
10980     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
10981               .addReg(PPC::X2)
10982               .addImm(TOCOffset)
10983               .addReg(BufReg)
10984               .cloneMemRefs(MI);
10985   }
10986 
10987   // Naked functions never have a base pointer, and so we use r1. For all
10988   // other functions, this decision must be delayed until during PEI.
10989   unsigned BaseReg;
10990   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
10991     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
10992   else
10993     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
10994 
10995   MIB = BuildMI(*thisMBB, MI, DL,
10996                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
10997             .addReg(BaseReg)
10998             .addImm(BPOffset)
10999             .addReg(BufReg)
11000             .cloneMemRefs(MI);
11001 
11002   // Setup
11003   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11004   MIB.addRegMask(TRI->getNoPreservedMask());
11005 
11006   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11007 
11008   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11009           .addMBB(mainMBB);
11010   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11011 
11012   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11013   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11014 
11015   // mainMBB:
11016   //  mainDstReg = 0
11017   MIB =
11018       BuildMI(mainMBB, DL,
11019               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11020 
11021   // Store IP
11022   if (Subtarget.isPPC64()) {
11023     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11024             .addReg(LabelReg)
11025             .addImm(LabelOffset)
11026             .addReg(BufReg);
11027   } else {
11028     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11029             .addReg(LabelReg)
11030             .addImm(LabelOffset)
11031             .addReg(BufReg);
11032   }
11033   MIB.cloneMemRefs(MI);
11034 
11035   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11036   mainMBB->addSuccessor(sinkMBB);
11037 
11038   // sinkMBB:
11039   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11040           TII->get(PPC::PHI), DstReg)
11041     .addReg(mainDstReg).addMBB(mainMBB)
11042     .addReg(restoreDstReg).addMBB(thisMBB);
11043 
11044   MI.eraseFromParent();
11045   return sinkMBB;
11046 }
11047 
11048 MachineBasicBlock *
11049 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11050                                      MachineBasicBlock *MBB) const {
11051   DebugLoc DL = MI.getDebugLoc();
11052   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11053 
11054   MachineFunction *MF = MBB->getParent();
11055   MachineRegisterInfo &MRI = MF->getRegInfo();
11056 
11057   MVT PVT = getPointerTy(MF->getDataLayout());
11058   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11059          "Invalid Pointer Size!");
11060 
11061   const TargetRegisterClass *RC =
11062     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11063   Register Tmp = MRI.createVirtualRegister(RC);
11064   // Since FP is only updated here but NOT referenced, it's treated as GPR.
11065   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11066   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11067   unsigned BP =
11068       (PVT == MVT::i64)
11069           ? PPC::X30
11070           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11071                                                               : PPC::R30);
11072 
11073   MachineInstrBuilder MIB;
11074 
11075   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11076   const int64_t SPOffset    = 2 * PVT.getStoreSize();
11077   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11078   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11079 
11080   Register BufReg = MI.getOperand(0).getReg();
11081 
11082   // Reload FP (the jumped-to function may not have had a
11083   // frame pointer, and if so, then its r31 will be restored
11084   // as necessary).
11085   if (PVT == MVT::i64) {
11086     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11087             .addImm(0)
11088             .addReg(BufReg);
11089   } else {
11090     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11091             .addImm(0)
11092             .addReg(BufReg);
11093   }
11094   MIB.cloneMemRefs(MI);
11095 
11096   // Reload IP
11097   if (PVT == MVT::i64) {
11098     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11099             .addImm(LabelOffset)
11100             .addReg(BufReg);
11101   } else {
11102     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11103             .addImm(LabelOffset)
11104             .addReg(BufReg);
11105   }
11106   MIB.cloneMemRefs(MI);
11107 
11108   // Reload SP
11109   if (PVT == MVT::i64) {
11110     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11111             .addImm(SPOffset)
11112             .addReg(BufReg);
11113   } else {
11114     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11115             .addImm(SPOffset)
11116             .addReg(BufReg);
11117   }
11118   MIB.cloneMemRefs(MI);
11119 
11120   // Reload BP
11121   if (PVT == MVT::i64) {
11122     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11123             .addImm(BPOffset)
11124             .addReg(BufReg);
11125   } else {
11126     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11127             .addImm(BPOffset)
11128             .addReg(BufReg);
11129   }
11130   MIB.cloneMemRefs(MI);
11131 
11132   // Reload TOC
11133   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11134     setUsesTOCBasePtr(*MBB->getParent());
11135     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11136               .addImm(TOCOffset)
11137               .addReg(BufReg)
11138               .cloneMemRefs(MI);
11139   }
11140 
11141   // Jump
11142   BuildMI(*MBB, MI, DL,
11143           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11144   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11145 
11146   MI.eraseFromParent();
11147   return MBB;
11148 }
11149 
11150 bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
11151   // If the function specifically requests inline stack probes, emit them.
11152   if (MF.getFunction().hasFnAttribute("probe-stack"))
11153     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11154            "inline-asm";
11155   return false;
11156 }
11157 
11158 unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
11159   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11160   unsigned StackAlign = TFI->getStackAlignment();
11161   assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
11162          "Unexpected stack alignment");
11163   // The default stack probe size is 4096 if the function has no
11164   // stack-probe-size attribute.
11165   unsigned StackProbeSize = 4096;
11166   const Function &Fn = MF.getFunction();
11167   if (Fn.hasFnAttribute("stack-probe-size"))
11168     Fn.getFnAttribute("stack-probe-size")
11169         .getValueAsString()
11170         .getAsInteger(0, StackProbeSize);
11171   // Round down to the stack alignment.
11172   StackProbeSize &= ~(StackAlign - 1);
11173   return StackProbeSize ? StackProbeSize : StackAlign;
11174 }
11175 
11176 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11177 // into three phases. In the first phase, it uses pseudo instruction
11178 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11179 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11180 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11181 // MaxCallFrameSize so that it can calculate correct data area pointer.
11182 MachineBasicBlock *
11183 PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
11184                                     MachineBasicBlock *MBB) const {
11185   const bool isPPC64 = Subtarget.isPPC64();
11186   MachineFunction *MF = MBB->getParent();
11187   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11188   DebugLoc DL = MI.getDebugLoc();
11189   const unsigned ProbeSize = getStackProbeSize(*MF);
11190   const BasicBlock *ProbedBB = MBB->getBasicBlock();
11191   MachineRegisterInfo &MRI = MF->getRegInfo();
11192   // The CFG of probing stack looks as
11193   //         +-----+
11194   //         | MBB |
11195   //         +--+--+
11196   //            |
11197   //       +----v----+
11198   //  +--->+ TestMBB +---+
11199   //  |    +----+----+   |
11200   //  |         |        |
11201   //  |   +-----v----+   |
11202   //  +---+ BlockMBB |   |
11203   //      +----------+   |
11204   //                     |
11205   //       +---------+   |
11206   //       | TailMBB +<--+
11207   //       +---------+
11208   // In MBB, calculate previous frame pointer and final stack pointer.
11209   // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11210   // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11211   // TailMBB is spliced via \p MI.
11212   MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11213   MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11214   MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11215 
11216   MachineFunction::iterator MBBIter = ++MBB->getIterator();
11217   MF->insert(MBBIter, TestMBB);
11218   MF->insert(MBBIter, BlockMBB);
11219   MF->insert(MBBIter, TailMBB);
11220 
11221   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11222   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11223 
11224   Register DstReg = MI.getOperand(0).getReg();
11225   Register NegSizeReg = MI.getOperand(1).getReg();
11226   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11227   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11228   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11229   Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11230 
11231   // Since value of NegSizeReg might be realigned in prologepilog, insert a
11232   // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11233   // NegSize.
11234   unsigned ProbeOpc;
11235   if (!MRI.hasOneNonDBGUse(NegSizeReg))
11236     ProbeOpc =
11237         isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11238   else
11239     // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11240     // and NegSizeReg will be allocated in the same phyreg to avoid
11241     // redundant copy when NegSizeReg has only one use which is current MI and
11242     // will be replaced by PREPARE_PROBED_ALLOCA then.
11243     ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11244                        : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11245   BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11246       .addDef(ActualNegSizeReg)
11247       .addReg(NegSizeReg)
11248       .add(MI.getOperand(2))
11249       .add(MI.getOperand(3));
11250 
11251   // Calculate final stack pointer, which equals to SP + ActualNegSize.
11252   BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11253           FinalStackPtr)
11254       .addReg(SPReg)
11255       .addReg(ActualNegSizeReg);
11256 
11257   // Materialize a scratch register for update.
11258   int64_t NegProbeSize = -(int64_t)ProbeSize;
11259   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11260   Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11261   if (!isInt<16>(NegProbeSize)) {
11262     Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11263     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11264         .addImm(NegProbeSize >> 16);
11265     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11266             ScratchReg)
11267         .addReg(TempReg)
11268         .addImm(NegProbeSize & 0xFFFF);
11269   } else
11270     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11271         .addImm(NegProbeSize);
11272 
11273   {
11274     // Probing leading residual part.
11275     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11276     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11277         .addReg(ActualNegSizeReg)
11278         .addReg(ScratchReg);
11279     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11280     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11281         .addReg(Div)
11282         .addReg(ScratchReg);
11283     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11284     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11285         .addReg(Mul)
11286         .addReg(ActualNegSizeReg);
11287     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11288         .addReg(FramePointer)
11289         .addReg(SPReg)
11290         .addReg(NegMod);
11291   }
11292 
11293   {
11294     // Remaining part should be multiple of ProbeSize.
11295     Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11296     BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11297         .addReg(SPReg)
11298         .addReg(FinalStackPtr);
11299     BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11300         .addImm(PPC::PRED_EQ)
11301         .addReg(CmpResult)
11302         .addMBB(TailMBB);
11303     TestMBB->addSuccessor(BlockMBB);
11304     TestMBB->addSuccessor(TailMBB);
11305   }
11306 
11307   {
11308     // Touch the block.
11309     // |P...|P...|P...
11310     BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11311         .addReg(FramePointer)
11312         .addReg(SPReg)
11313         .addReg(ScratchReg);
11314     BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11315     BlockMBB->addSuccessor(TestMBB);
11316   }
11317 
11318   // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11319   // DYNAREAOFFSET pseudo instruction to get the future result.
11320   Register MaxCallFrameSizeReg =
11321       MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11322   BuildMI(TailMBB, DL,
11323           TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11324           MaxCallFrameSizeReg)
11325       .add(MI.getOperand(2))
11326       .add(MI.getOperand(3));
11327   BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11328       .addReg(SPReg)
11329       .addReg(MaxCallFrameSizeReg);
11330 
11331   // Splice instructions after MI to TailMBB.
11332   TailMBB->splice(TailMBB->end(), MBB,
11333                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11334   TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
11335   MBB->addSuccessor(TestMBB);
11336 
11337   // Delete the pseudo instruction.
11338   MI.eraseFromParent();
11339 
11340   ++NumDynamicAllocaProbed;
11341   return TailMBB;
11342 }
11343 
11344 MachineBasicBlock *
11345 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11346                                                MachineBasicBlock *BB) const {
11347   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11348       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11349     if (Subtarget.is64BitELFABI() &&
11350         MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11351         !Subtarget.isUsingPCRelativeCalls()) {
11352       // Call lowering should have added an r2 operand to indicate a dependence
11353       // on the TOC base pointer value. It can't however, because there is no
11354       // way to mark the dependence as implicit there, and so the stackmap code
11355       // will confuse it with a regular operand. Instead, add the dependence
11356       // here.
11357       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11358     }
11359 
11360     return emitPatchPoint(MI, BB);
11361   }
11362 
11363   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11364       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11365     return emitEHSjLjSetJmp(MI, BB);
11366   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11367              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11368     return emitEHSjLjLongJmp(MI, BB);
11369   }
11370 
11371   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11372 
11373   // To "insert" these instructions we actually have to insert their
11374   // control-flow patterns.
11375   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11376   MachineFunction::iterator It = ++BB->getIterator();
11377 
11378   MachineFunction *F = BB->getParent();
11379 
11380   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11381       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11382       MI.getOpcode() == PPC::SELECT_I8) {
11383     SmallVector<MachineOperand, 2> Cond;
11384     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11385         MI.getOpcode() == PPC::SELECT_CC_I8)
11386       Cond.push_back(MI.getOperand(4));
11387     else
11388       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
11389     Cond.push_back(MI.getOperand(1));
11390 
11391     DebugLoc dl = MI.getDebugLoc();
11392     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11393                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11394   } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11395              MI.getOpcode() == PPC::SELECT_CC_F8 ||
11396              MI.getOpcode() == PPC::SELECT_CC_F16 ||
11397              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11398              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11399              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11400              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11401              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11402              MI.getOpcode() == PPC::SELECT_CC_SPE ||
11403              MI.getOpcode() == PPC::SELECT_F4 ||
11404              MI.getOpcode() == PPC::SELECT_F8 ||
11405              MI.getOpcode() == PPC::SELECT_F16 ||
11406              MI.getOpcode() == PPC::SELECT_SPE ||
11407              MI.getOpcode() == PPC::SELECT_SPE4 ||
11408              MI.getOpcode() == PPC::SELECT_VRRC ||
11409              MI.getOpcode() == PPC::SELECT_VSFRC ||
11410              MI.getOpcode() == PPC::SELECT_VSSRC ||
11411              MI.getOpcode() == PPC::SELECT_VSRC) {
11412     // The incoming instruction knows the destination vreg to set, the
11413     // condition code register to branch on, the true/false values to
11414     // select between, and a branch opcode to use.
11415 
11416     //  thisMBB:
11417     //  ...
11418     //   TrueVal = ...
11419     //   cmpTY ccX, r1, r2
11420     //   bCC copy1MBB
11421     //   fallthrough --> copy0MBB
11422     MachineBasicBlock *thisMBB = BB;
11423     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11424     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11425     DebugLoc dl = MI.getDebugLoc();
11426     F->insert(It, copy0MBB);
11427     F->insert(It, sinkMBB);
11428 
11429     // Transfer the remainder of BB and its successor edges to sinkMBB.
11430     sinkMBB->splice(sinkMBB->begin(), BB,
11431                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11432     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11433 
11434     // Next, add the true and fallthrough blocks as its successors.
11435     BB->addSuccessor(copy0MBB);
11436     BB->addSuccessor(sinkMBB);
11437 
11438     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11439         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11440         MI.getOpcode() == PPC::SELECT_F16 ||
11441         MI.getOpcode() == PPC::SELECT_SPE4 ||
11442         MI.getOpcode() == PPC::SELECT_SPE ||
11443         MI.getOpcode() == PPC::SELECT_VRRC ||
11444         MI.getOpcode() == PPC::SELECT_VSFRC ||
11445         MI.getOpcode() == PPC::SELECT_VSSRC ||
11446         MI.getOpcode() == PPC::SELECT_VSRC) {
11447       BuildMI(BB, dl, TII->get(PPC::BC))
11448           .addReg(MI.getOperand(1).getReg())
11449           .addMBB(sinkMBB);
11450     } else {
11451       unsigned SelectPred = MI.getOperand(4).getImm();
11452       BuildMI(BB, dl, TII->get(PPC::BCC))
11453           .addImm(SelectPred)
11454           .addReg(MI.getOperand(1).getReg())
11455           .addMBB(sinkMBB);
11456     }
11457 
11458     //  copy0MBB:
11459     //   %FalseValue = ...
11460     //   # fallthrough to sinkMBB
11461     BB = copy0MBB;
11462 
11463     // Update machine-CFG edges
11464     BB->addSuccessor(sinkMBB);
11465 
11466     //  sinkMBB:
11467     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11468     //  ...
11469     BB = sinkMBB;
11470     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11471         .addReg(MI.getOperand(3).getReg())
11472         .addMBB(copy0MBB)
11473         .addReg(MI.getOperand(2).getReg())
11474         .addMBB(thisMBB);
11475   } else if (MI.getOpcode() == PPC::ReadTB) {
11476     // To read the 64-bit time-base register on a 32-bit target, we read the
11477     // two halves. Should the counter have wrapped while it was being read, we
11478     // need to try again.
11479     // ...
11480     // readLoop:
11481     // mfspr Rx,TBU # load from TBU
11482     // mfspr Ry,TB  # load from TB
11483     // mfspr Rz,TBU # load from TBU
11484     // cmpw crX,Rx,Rz # check if 'old'='new'
11485     // bne readLoop   # branch if they're not equal
11486     // ...
11487 
11488     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
11489     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11490     DebugLoc dl = MI.getDebugLoc();
11491     F->insert(It, readMBB);
11492     F->insert(It, sinkMBB);
11493 
11494     // Transfer the remainder of BB and its successor edges to sinkMBB.
11495     sinkMBB->splice(sinkMBB->begin(), BB,
11496                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11497     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11498 
11499     BB->addSuccessor(readMBB);
11500     BB = readMBB;
11501 
11502     MachineRegisterInfo &RegInfo = F->getRegInfo();
11503     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11504     Register LoReg = MI.getOperand(0).getReg();
11505     Register HiReg = MI.getOperand(1).getReg();
11506 
11507     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
11508     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
11509     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
11510 
11511     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11512 
11513     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
11514         .addReg(HiReg)
11515         .addReg(ReadAgainReg);
11516     BuildMI(BB, dl, TII->get(PPC::BCC))
11517         .addImm(PPC::PRED_NE)
11518         .addReg(CmpReg)
11519         .addMBB(readMBB);
11520 
11521     BB->addSuccessor(readMBB);
11522     BB->addSuccessor(sinkMBB);
11523   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
11524     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
11525   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
11526     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
11527   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
11528     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
11529   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
11530     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
11531 
11532   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
11533     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
11534   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
11535     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
11536   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
11537     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
11538   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
11539     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
11540 
11541   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
11542     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
11543   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
11544     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
11545   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
11546     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
11547   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
11548     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
11549 
11550   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
11551     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
11552   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
11553     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
11554   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
11555     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
11556   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
11557     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
11558 
11559   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
11560     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
11561   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
11562     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
11563   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
11564     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
11565   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
11566     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
11567 
11568   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
11569     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
11570   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
11571     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
11572   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
11573     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
11574   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
11575     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
11576 
11577   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
11578     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
11579   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
11580     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
11581   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
11582     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
11583   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
11584     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
11585 
11586   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
11587     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
11588   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
11589     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
11590   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
11591     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
11592   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
11593     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
11594 
11595   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
11596     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
11597   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
11598     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
11599   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
11600     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
11601   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
11602     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
11603 
11604   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
11605     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
11606   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
11607     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
11608   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
11609     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
11610   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
11611     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
11612 
11613   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
11614     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
11615   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
11616     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
11617   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
11618     BB = EmitAtomicBinary(MI, BB, 4, 0);
11619   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
11620     BB = EmitAtomicBinary(MI, BB, 8, 0);
11621   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
11622            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
11623            (Subtarget.hasPartwordAtomics() &&
11624             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
11625            (Subtarget.hasPartwordAtomics() &&
11626             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
11627     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
11628 
11629     auto LoadMnemonic = PPC::LDARX;
11630     auto StoreMnemonic = PPC::STDCX;
11631     switch (MI.getOpcode()) {
11632     default:
11633       llvm_unreachable("Compare and swap of unknown size");
11634     case PPC::ATOMIC_CMP_SWAP_I8:
11635       LoadMnemonic = PPC::LBARX;
11636       StoreMnemonic = PPC::STBCX;
11637       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11638       break;
11639     case PPC::ATOMIC_CMP_SWAP_I16:
11640       LoadMnemonic = PPC::LHARX;
11641       StoreMnemonic = PPC::STHCX;
11642       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11643       break;
11644     case PPC::ATOMIC_CMP_SWAP_I32:
11645       LoadMnemonic = PPC::LWARX;
11646       StoreMnemonic = PPC::STWCX;
11647       break;
11648     case PPC::ATOMIC_CMP_SWAP_I64:
11649       LoadMnemonic = PPC::LDARX;
11650       StoreMnemonic = PPC::STDCX;
11651       break;
11652     }
11653     Register dest = MI.getOperand(0).getReg();
11654     Register ptrA = MI.getOperand(1).getReg();
11655     Register ptrB = MI.getOperand(2).getReg();
11656     Register oldval = MI.getOperand(3).getReg();
11657     Register newval = MI.getOperand(4).getReg();
11658     DebugLoc dl = MI.getDebugLoc();
11659 
11660     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11661     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11662     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11663     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11664     F->insert(It, loop1MBB);
11665     F->insert(It, loop2MBB);
11666     F->insert(It, midMBB);
11667     F->insert(It, exitMBB);
11668     exitMBB->splice(exitMBB->begin(), BB,
11669                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11670     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11671 
11672     //  thisMBB:
11673     //   ...
11674     //   fallthrough --> loopMBB
11675     BB->addSuccessor(loop1MBB);
11676 
11677     // loop1MBB:
11678     //   l[bhwd]arx dest, ptr
11679     //   cmp[wd] dest, oldval
11680     //   bne- midMBB
11681     // loop2MBB:
11682     //   st[bhwd]cx. newval, ptr
11683     //   bne- loopMBB
11684     //   b exitBB
11685     // midMBB:
11686     //   st[bhwd]cx. dest, ptr
11687     // exitBB:
11688     BB = loop1MBB;
11689     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11690     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
11691         .addReg(oldval)
11692         .addReg(dest);
11693     BuildMI(BB, dl, TII->get(PPC::BCC))
11694         .addImm(PPC::PRED_NE)
11695         .addReg(PPC::CR0)
11696         .addMBB(midMBB);
11697     BB->addSuccessor(loop2MBB);
11698     BB->addSuccessor(midMBB);
11699 
11700     BB = loop2MBB;
11701     BuildMI(BB, dl, TII->get(StoreMnemonic))
11702         .addReg(newval)
11703         .addReg(ptrA)
11704         .addReg(ptrB);
11705     BuildMI(BB, dl, TII->get(PPC::BCC))
11706         .addImm(PPC::PRED_NE)
11707         .addReg(PPC::CR0)
11708         .addMBB(loop1MBB);
11709     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11710     BB->addSuccessor(loop1MBB);
11711     BB->addSuccessor(exitMBB);
11712 
11713     BB = midMBB;
11714     BuildMI(BB, dl, TII->get(StoreMnemonic))
11715         .addReg(dest)
11716         .addReg(ptrA)
11717         .addReg(ptrB);
11718     BB->addSuccessor(exitMBB);
11719 
11720     //  exitMBB:
11721     //   ...
11722     BB = exitMBB;
11723   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11724              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
11725     // We must use 64-bit registers for addresses when targeting 64-bit,
11726     // since we're actually doing arithmetic on them.  Other registers
11727     // can be 32-bit.
11728     bool is64bit = Subtarget.isPPC64();
11729     bool isLittleEndian = Subtarget.isLittleEndian();
11730     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11731 
11732     Register dest = MI.getOperand(0).getReg();
11733     Register ptrA = MI.getOperand(1).getReg();
11734     Register ptrB = MI.getOperand(2).getReg();
11735     Register oldval = MI.getOperand(3).getReg();
11736     Register newval = MI.getOperand(4).getReg();
11737     DebugLoc dl = MI.getDebugLoc();
11738 
11739     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11740     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11741     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11742     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11743     F->insert(It, loop1MBB);
11744     F->insert(It, loop2MBB);
11745     F->insert(It, midMBB);
11746     F->insert(It, exitMBB);
11747     exitMBB->splice(exitMBB->begin(), BB,
11748                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11749     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11750 
11751     MachineRegisterInfo &RegInfo = F->getRegInfo();
11752     const TargetRegisterClass *RC =
11753         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11754     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11755 
11756     Register PtrReg = RegInfo.createVirtualRegister(RC);
11757     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11758     Register ShiftReg =
11759         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11760     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
11761     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
11762     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
11763     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
11764     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11765     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11766     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11767     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11768     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11769     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11770     Register Ptr1Reg;
11771     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
11772     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11773     //  thisMBB:
11774     //   ...
11775     //   fallthrough --> loopMBB
11776     BB->addSuccessor(loop1MBB);
11777 
11778     // The 4-byte load must be aligned, while a char or short may be
11779     // anywhere in the word.  Hence all this nasty bookkeeping code.
11780     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
11781     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11782     //   xori shift, shift1, 24 [16]
11783     //   rlwinm ptr, ptr1, 0, 0, 29
11784     //   slw newval2, newval, shift
11785     //   slw oldval2, oldval,shift
11786     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11787     //   slw mask, mask2, shift
11788     //   and newval3, newval2, mask
11789     //   and oldval3, oldval2, mask
11790     // loop1MBB:
11791     //   lwarx tmpDest, ptr
11792     //   and tmp, tmpDest, mask
11793     //   cmpw tmp, oldval3
11794     //   bne- midMBB
11795     // loop2MBB:
11796     //   andc tmp2, tmpDest, mask
11797     //   or tmp4, tmp2, newval3
11798     //   stwcx. tmp4, ptr
11799     //   bne- loop1MBB
11800     //   b exitBB
11801     // midMBB:
11802     //   stwcx. tmpDest, ptr
11803     // exitBB:
11804     //   srw dest, tmpDest, shift
11805     if (ptrA != ZeroReg) {
11806       Ptr1Reg = RegInfo.createVirtualRegister(RC);
11807       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11808           .addReg(ptrA)
11809           .addReg(ptrB);
11810     } else {
11811       Ptr1Reg = ptrB;
11812     }
11813 
11814     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11815     // mode.
11816     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11817         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11818         .addImm(3)
11819         .addImm(27)
11820         .addImm(is8bit ? 28 : 27);
11821     if (!isLittleEndian)
11822       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11823           .addReg(Shift1Reg)
11824           .addImm(is8bit ? 24 : 16);
11825     if (is64bit)
11826       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11827           .addReg(Ptr1Reg)
11828           .addImm(0)
11829           .addImm(61);
11830     else
11831       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11832           .addReg(Ptr1Reg)
11833           .addImm(0)
11834           .addImm(0)
11835           .addImm(29);
11836     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
11837         .addReg(newval)
11838         .addReg(ShiftReg);
11839     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
11840         .addReg(oldval)
11841         .addReg(ShiftReg);
11842     if (is8bit)
11843       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11844     else {
11845       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11846       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11847           .addReg(Mask3Reg)
11848           .addImm(65535);
11849     }
11850     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11851         .addReg(Mask2Reg)
11852         .addReg(ShiftReg);
11853     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
11854         .addReg(NewVal2Reg)
11855         .addReg(MaskReg);
11856     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
11857         .addReg(OldVal2Reg)
11858         .addReg(MaskReg);
11859 
11860     BB = loop1MBB;
11861     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11862         .addReg(ZeroReg)
11863         .addReg(PtrReg);
11864     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
11865         .addReg(TmpDestReg)
11866         .addReg(MaskReg);
11867     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
11868         .addReg(TmpReg)
11869         .addReg(OldVal3Reg);
11870     BuildMI(BB, dl, TII->get(PPC::BCC))
11871         .addImm(PPC::PRED_NE)
11872         .addReg(PPC::CR0)
11873         .addMBB(midMBB);
11874     BB->addSuccessor(loop2MBB);
11875     BB->addSuccessor(midMBB);
11876 
11877     BB = loop2MBB;
11878     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11879         .addReg(TmpDestReg)
11880         .addReg(MaskReg);
11881     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
11882         .addReg(Tmp2Reg)
11883         .addReg(NewVal3Reg);
11884     BuildMI(BB, dl, TII->get(PPC::STWCX))
11885         .addReg(Tmp4Reg)
11886         .addReg(ZeroReg)
11887         .addReg(PtrReg);
11888     BuildMI(BB, dl, TII->get(PPC::BCC))
11889         .addImm(PPC::PRED_NE)
11890         .addReg(PPC::CR0)
11891         .addMBB(loop1MBB);
11892     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11893     BB->addSuccessor(loop1MBB);
11894     BB->addSuccessor(exitMBB);
11895 
11896     BB = midMBB;
11897     BuildMI(BB, dl, TII->get(PPC::STWCX))
11898         .addReg(TmpDestReg)
11899         .addReg(ZeroReg)
11900         .addReg(PtrReg);
11901     BB->addSuccessor(exitMBB);
11902 
11903     //  exitMBB:
11904     //   ...
11905     BB = exitMBB;
11906     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11907         .addReg(TmpReg)
11908         .addReg(ShiftReg);
11909   } else if (MI.getOpcode() == PPC::FADDrtz) {
11910     // This pseudo performs an FADD with rounding mode temporarily forced
11911     // to round-to-zero.  We emit this via custom inserter since the FPSCR
11912     // is not modeled at the SelectionDAG level.
11913     Register Dest = MI.getOperand(0).getReg();
11914     Register Src1 = MI.getOperand(1).getReg();
11915     Register Src2 = MI.getOperand(2).getReg();
11916     DebugLoc dl = MI.getDebugLoc();
11917 
11918     MachineRegisterInfo &RegInfo = F->getRegInfo();
11919     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11920 
11921     // Save FPSCR value.
11922     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
11923 
11924     // Set rounding mode to round-to-zero.
11925     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
11926         .addImm(31)
11927         .addReg(PPC::RM, RegState::ImplicitDefine);
11928 
11929     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
11930         .addImm(30)
11931         .addReg(PPC::RM, RegState::ImplicitDefine);
11932 
11933     // Perform addition.
11934     auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
11935                    .addReg(Src1)
11936                    .addReg(Src2);
11937     if (MI.getFlag(MachineInstr::NoFPExcept))
11938       MIB.setMIFlag(MachineInstr::NoFPExcept);
11939 
11940     // Restore FPSCR value.
11941     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
11942   } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
11943              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
11944              MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
11945              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
11946     unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
11947                        MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
11948                           ? PPC::ANDI8_rec
11949                           : PPC::ANDI_rec;
11950     bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
11951                  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
11952 
11953     MachineRegisterInfo &RegInfo = F->getRegInfo();
11954     Register Dest = RegInfo.createVirtualRegister(
11955         Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
11956 
11957     DebugLoc Dl = MI.getDebugLoc();
11958     BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
11959         .addReg(MI.getOperand(1).getReg())
11960         .addImm(1);
11961     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11962             MI.getOperand(0).getReg())
11963         .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
11964   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
11965     DebugLoc Dl = MI.getDebugLoc();
11966     MachineRegisterInfo &RegInfo = F->getRegInfo();
11967     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11968     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
11969     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11970             MI.getOperand(0).getReg())
11971         .addReg(CRReg);
11972   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
11973     DebugLoc Dl = MI.getDebugLoc();
11974     unsigned Imm = MI.getOperand(1).getImm();
11975     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
11976     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
11977             MI.getOperand(0).getReg())
11978         .addReg(PPC::CR0EQ);
11979   } else if (MI.getOpcode() == PPC::SETRNDi) {
11980     DebugLoc dl = MI.getDebugLoc();
11981     Register OldFPSCRReg = MI.getOperand(0).getReg();
11982 
11983     // Save FPSCR value.
11984     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
11985 
11986     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
11987     // the following settings:
11988     //   00 Round to nearest
11989     //   01 Round to 0
11990     //   10 Round to +inf
11991     //   11 Round to -inf
11992 
11993     // When the operand is immediate, using the two least significant bits of
11994     // the immediate to set the bits 62:63 of FPSCR.
11995     unsigned Mode = MI.getOperand(1).getImm();
11996     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
11997         .addImm(31)
11998         .addReg(PPC::RM, RegState::ImplicitDefine);
11999 
12000     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12001         .addImm(30)
12002         .addReg(PPC::RM, RegState::ImplicitDefine);
12003   } else if (MI.getOpcode() == PPC::SETRND) {
12004     DebugLoc dl = MI.getDebugLoc();
12005 
12006     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12007     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12008     // If the target doesn't have DirectMove, we should use stack to do the
12009     // conversion, because the target doesn't have the instructions like mtvsrd
12010     // or mfvsrd to do this conversion directly.
12011     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12012       if (Subtarget.hasDirectMove()) {
12013         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12014           .addReg(SrcReg);
12015       } else {
12016         // Use stack to do the register copy.
12017         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12018         MachineRegisterInfo &RegInfo = F->getRegInfo();
12019         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12020         if (RC == &PPC::F8RCRegClass) {
12021           // Copy register from F8RCRegClass to G8RCRegclass.
12022           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12023                  "Unsupported RegClass.");
12024 
12025           StoreOp = PPC::STFD;
12026           LoadOp = PPC::LD;
12027         } else {
12028           // Copy register from G8RCRegClass to F8RCRegclass.
12029           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12030                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12031                  "Unsupported RegClass.");
12032         }
12033 
12034         MachineFrameInfo &MFI = F->getFrameInfo();
12035         int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12036 
12037         MachineMemOperand *MMOStore = F->getMachineMemOperand(
12038             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12039             MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12040             MFI.getObjectAlign(FrameIdx));
12041 
12042         // Store the SrcReg into the stack.
12043         BuildMI(*BB, MI, dl, TII->get(StoreOp))
12044           .addReg(SrcReg)
12045           .addImm(0)
12046           .addFrameIndex(FrameIdx)
12047           .addMemOperand(MMOStore);
12048 
12049         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12050             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12051             MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12052             MFI.getObjectAlign(FrameIdx));
12053 
12054         // Load from the stack where SrcReg is stored, and save to DestReg,
12055         // so we have done the RegClass conversion from RegClass::SrcReg to
12056         // RegClass::DestReg.
12057         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12058           .addImm(0)
12059           .addFrameIndex(FrameIdx)
12060           .addMemOperand(MMOLoad);
12061       }
12062     };
12063 
12064     Register OldFPSCRReg = MI.getOperand(0).getReg();
12065 
12066     // Save FPSCR value.
12067     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12068 
12069     // When the operand is gprc register, use two least significant bits of the
12070     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12071     //
12072     // copy OldFPSCRTmpReg, OldFPSCRReg
12073     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12074     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12075     // copy NewFPSCRReg, NewFPSCRTmpReg
12076     // mtfsf 255, NewFPSCRReg
12077     MachineOperand SrcOp = MI.getOperand(1);
12078     MachineRegisterInfo &RegInfo = F->getRegInfo();
12079     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12080 
12081     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12082 
12083     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12084     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12085 
12086     // The first operand of INSERT_SUBREG should be a register which has
12087     // subregisters, we only care about its RegClass, so we should use an
12088     // IMPLICIT_DEF register.
12089     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12090     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12091       .addReg(ImDefReg)
12092       .add(SrcOp)
12093       .addImm(1);
12094 
12095     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12096     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12097       .addReg(OldFPSCRTmpReg)
12098       .addReg(ExtSrcReg)
12099       .addImm(0)
12100       .addImm(62);
12101 
12102     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12103     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12104 
12105     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12106     // bits of FPSCR.
12107     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12108       .addImm(255)
12109       .addReg(NewFPSCRReg)
12110       .addImm(0)
12111       .addImm(0);
12112   } else if (MI.getOpcode() == PPC::SETFLM) {
12113     DebugLoc Dl = MI.getDebugLoc();
12114 
12115     // Result of setflm is previous FPSCR content, so we need to save it first.
12116     Register OldFPSCRReg = MI.getOperand(0).getReg();
12117     BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12118 
12119     // Put bits in 32:63 to FPSCR.
12120     Register NewFPSCRReg = MI.getOperand(1).getReg();
12121     BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12122         .addImm(255)
12123         .addReg(NewFPSCRReg)
12124         .addImm(0)
12125         .addImm(0);
12126   } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12127              MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12128     return emitProbedAlloca(MI, BB);
12129   } else {
12130     llvm_unreachable("Unexpected instr type to insert");
12131   }
12132 
12133   MI.eraseFromParent(); // The pseudo instruction is gone now.
12134   return BB;
12135 }
12136 
12137 //===----------------------------------------------------------------------===//
12138 // Target Optimization Hooks
12139 //===----------------------------------------------------------------------===//
12140 
12141 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12142   // For the estimates, convergence is quadratic, so we essentially double the
12143   // number of digits correct after every iteration. For both FRE and FRSQRTE,
12144   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12145   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12146   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12147   if (VT.getScalarType() == MVT::f64)
12148     RefinementSteps++;
12149   return RefinementSteps;
12150 }
12151 
12152 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
12153                                             const DenormalMode &Mode) const {
12154   // We only have VSX Vector Test for software Square Root.
12155   EVT VT = Op.getValueType();
12156   if (!isTypeLegal(MVT::i1) ||
12157       (VT != MVT::f64 &&
12158        ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
12159     return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
12160 
12161   SDLoc DL(Op);
12162   // The output register of FTSQRT is CR field.
12163   SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
12164   // ftsqrt BF,FRB
12165   // Let e_b be the unbiased exponent of the double-precision
12166   // floating-point operand in register FRB.
12167   // fe_flag is set to 1 if either of the following conditions occurs.
12168   //   - The double-precision floating-point operand in register FRB is a zero,
12169   //     a NaN, or an infinity, or a negative value.
12170   //   - e_b is less than or equal to -970.
12171   // Otherwise fe_flag is set to 0.
12172   // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
12173   // not eligible for iteration. (zero/negative/infinity/nan or unbiased
12174   // exponent is less than -970)
12175   SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
12176   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
12177                                     FTSQRT, SRIdxVal),
12178                  0);
12179 }
12180 
12181 SDValue
12182 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
12183                                                SelectionDAG &DAG) const {
12184   // We only have VSX Vector Square Root.
12185   EVT VT = Op.getValueType();
12186   if (VT != MVT::f64 &&
12187       ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
12188     return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
12189 
12190   return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
12191 }
12192 
12193 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12194                                            int Enabled, int &RefinementSteps,
12195                                            bool &UseOneConstNR,
12196                                            bool Reciprocal) const {
12197   EVT VT = Operand.getValueType();
12198   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12199       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12200       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12201       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12202     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12203       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12204 
12205     // The Newton-Raphson computation with a single constant does not provide
12206     // enough accuracy on some CPUs.
12207     UseOneConstNR = !Subtarget.needsTwoConstNR();
12208     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12209   }
12210   return SDValue();
12211 }
12212 
12213 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12214                                             int Enabled,
12215                                             int &RefinementSteps) const {
12216   EVT VT = Operand.getValueType();
12217   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12218       (VT == MVT::f64 && Subtarget.hasFRE()) ||
12219       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12220       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12221     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12222       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12223     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12224   }
12225   return SDValue();
12226 }
12227 
12228 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12229   // Note: This functionality is used only when unsafe-fp-math is enabled, and
12230   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12231   // enabled for division), this functionality is redundant with the default
12232   // combiner logic (once the division -> reciprocal/multiply transformation
12233   // has taken place). As a result, this matters more for older cores than for
12234   // newer ones.
12235 
12236   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12237   // reciprocal if there are two or more FDIVs (for embedded cores with only
12238   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12239   switch (Subtarget.getCPUDirective()) {
12240   default:
12241     return 3;
12242   case PPC::DIR_440:
12243   case PPC::DIR_A2:
12244   case PPC::DIR_E500:
12245   case PPC::DIR_E500mc:
12246   case PPC::DIR_E5500:
12247     return 2;
12248   }
12249 }
12250 
12251 // isConsecutiveLSLoc needs to work even if all adds have not yet been
12252 // collapsed, and so we need to look through chains of them.
12253 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
12254                                      int64_t& Offset, SelectionDAG &DAG) {
12255   if (DAG.isBaseWithConstantOffset(Loc)) {
12256     Base = Loc.getOperand(0);
12257     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12258 
12259     // The base might itself be a base plus an offset, and if so, accumulate
12260     // that as well.
12261     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12262   }
12263 }
12264 
12265 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12266                             unsigned Bytes, int Dist,
12267                             SelectionDAG &DAG) {
12268   if (VT.getSizeInBits() / 8 != Bytes)
12269     return false;
12270 
12271   SDValue BaseLoc = Base->getBasePtr();
12272   if (Loc.getOpcode() == ISD::FrameIndex) {
12273     if (BaseLoc.getOpcode() != ISD::FrameIndex)
12274       return false;
12275     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12276     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
12277     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12278     int FS  = MFI.getObjectSize(FI);
12279     int BFS = MFI.getObjectSize(BFI);
12280     if (FS != BFS || FS != (int)Bytes) return false;
12281     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12282   }
12283 
12284   SDValue Base1 = Loc, Base2 = BaseLoc;
12285   int64_t Offset1 = 0, Offset2 = 0;
12286   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12287   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12288   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12289     return true;
12290 
12291   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12292   const GlobalValue *GV1 = nullptr;
12293   const GlobalValue *GV2 = nullptr;
12294   Offset1 = 0;
12295   Offset2 = 0;
12296   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12297   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12298   if (isGA1 && isGA2 && GV1 == GV2)
12299     return Offset1 == (Offset2 + Dist*Bytes);
12300   return false;
12301 }
12302 
12303 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12304 // not enforce equality of the chain operands.
12305 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12306                             unsigned Bytes, int Dist,
12307                             SelectionDAG &DAG) {
12308   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12309     EVT VT = LS->getMemoryVT();
12310     SDValue Loc = LS->getBasePtr();
12311     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12312   }
12313 
12314   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12315     EVT VT;
12316     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12317     default: return false;
12318     case Intrinsic::ppc_altivec_lvx:
12319     case Intrinsic::ppc_altivec_lvxl:
12320     case Intrinsic::ppc_vsx_lxvw4x:
12321     case Intrinsic::ppc_vsx_lxvw4x_be:
12322       VT = MVT::v4i32;
12323       break;
12324     case Intrinsic::ppc_vsx_lxvd2x:
12325     case Intrinsic::ppc_vsx_lxvd2x_be:
12326       VT = MVT::v2f64;
12327       break;
12328     case Intrinsic::ppc_altivec_lvebx:
12329       VT = MVT::i8;
12330       break;
12331     case Intrinsic::ppc_altivec_lvehx:
12332       VT = MVT::i16;
12333       break;
12334     case Intrinsic::ppc_altivec_lvewx:
12335       VT = MVT::i32;
12336       break;
12337     }
12338 
12339     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12340   }
12341 
12342   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12343     EVT VT;
12344     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12345     default: return false;
12346     case Intrinsic::ppc_altivec_stvx:
12347     case Intrinsic::ppc_altivec_stvxl:
12348     case Intrinsic::ppc_vsx_stxvw4x:
12349       VT = MVT::v4i32;
12350       break;
12351     case Intrinsic::ppc_vsx_stxvd2x:
12352       VT = MVT::v2f64;
12353       break;
12354     case Intrinsic::ppc_vsx_stxvw4x_be:
12355       VT = MVT::v4i32;
12356       break;
12357     case Intrinsic::ppc_vsx_stxvd2x_be:
12358       VT = MVT::v2f64;
12359       break;
12360     case Intrinsic::ppc_altivec_stvebx:
12361       VT = MVT::i8;
12362       break;
12363     case Intrinsic::ppc_altivec_stvehx:
12364       VT = MVT::i16;
12365       break;
12366     case Intrinsic::ppc_altivec_stvewx:
12367       VT = MVT::i32;
12368       break;
12369     }
12370 
12371     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12372   }
12373 
12374   return false;
12375 }
12376 
12377 // Return true is there is a nearyby consecutive load to the one provided
12378 // (regardless of alignment). We search up and down the chain, looking though
12379 // token factors and other loads (but nothing else). As a result, a true result
12380 // indicates that it is safe to create a new consecutive load adjacent to the
12381 // load provided.
12382 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
12383   SDValue Chain = LD->getChain();
12384   EVT VT = LD->getMemoryVT();
12385 
12386   SmallSet<SDNode *, 16> LoadRoots;
12387   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12388   SmallSet<SDNode *, 16> Visited;
12389 
12390   // First, search up the chain, branching to follow all token-factor operands.
12391   // If we find a consecutive load, then we're done, otherwise, record all
12392   // nodes just above the top-level loads and token factors.
12393   while (!Queue.empty()) {
12394     SDNode *ChainNext = Queue.pop_back_val();
12395     if (!Visited.insert(ChainNext).second)
12396       continue;
12397 
12398     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12399       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12400         return true;
12401 
12402       if (!Visited.count(ChainLD->getChain().getNode()))
12403         Queue.push_back(ChainLD->getChain().getNode());
12404     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12405       for (const SDUse &O : ChainNext->ops())
12406         if (!Visited.count(O.getNode()))
12407           Queue.push_back(O.getNode());
12408     } else
12409       LoadRoots.insert(ChainNext);
12410   }
12411 
12412   // Second, search down the chain, starting from the top-level nodes recorded
12413   // in the first phase. These top-level nodes are the nodes just above all
12414   // loads and token factors. Starting with their uses, recursively look though
12415   // all loads (just the chain uses) and token factors to find a consecutive
12416   // load.
12417   Visited.clear();
12418   Queue.clear();
12419 
12420   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12421        IE = LoadRoots.end(); I != IE; ++I) {
12422     Queue.push_back(*I);
12423 
12424     while (!Queue.empty()) {
12425       SDNode *LoadRoot = Queue.pop_back_val();
12426       if (!Visited.insert(LoadRoot).second)
12427         continue;
12428 
12429       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12430         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12431           return true;
12432 
12433       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12434            UE = LoadRoot->use_end(); UI != UE; ++UI)
12435         if (((isa<MemSDNode>(*UI) &&
12436             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12437             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12438           Queue.push_back(*UI);
12439     }
12440   }
12441 
12442   return false;
12443 }
12444 
12445 /// This function is called when we have proved that a SETCC node can be replaced
12446 /// by subtraction (and other supporting instructions) so that the result of
12447 /// comparison is kept in a GPR instead of CR. This function is purely for
12448 /// codegen purposes and has some flags to guide the codegen process.
12449 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12450                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12451   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12452 
12453   // Zero extend the operands to the largest legal integer. Originally, they
12454   // must be of a strictly smaller size.
12455   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12456                          DAG.getConstant(Size, DL, MVT::i32));
12457   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12458                          DAG.getConstant(Size, DL, MVT::i32));
12459 
12460   // Swap if needed. Depends on the condition code.
12461   if (Swap)
12462     std::swap(Op0, Op1);
12463 
12464   // Subtract extended integers.
12465   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12466 
12467   // Move the sign bit to the least significant position and zero out the rest.
12468   // Now the least significant bit carries the result of original comparison.
12469   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12470                              DAG.getConstant(Size - 1, DL, MVT::i32));
12471   auto Final = Shifted;
12472 
12473   // Complement the result if needed. Based on the condition code.
12474   if (Complement)
12475     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12476                         DAG.getConstant(1, DL, MVT::i64));
12477 
12478   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
12479 }
12480 
12481 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
12482                                                   DAGCombinerInfo &DCI) const {
12483   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12484 
12485   SelectionDAG &DAG = DCI.DAG;
12486   SDLoc DL(N);
12487 
12488   // Size of integers being compared has a critical role in the following
12489   // analysis, so we prefer to do this when all types are legal.
12490   if (!DCI.isAfterLegalizeDAG())
12491     return SDValue();
12492 
12493   // If all users of SETCC extend its value to a legal integer type
12494   // then we replace SETCC with a subtraction
12495   for (SDNode::use_iterator UI = N->use_begin(),
12496        UE = N->use_end(); UI != UE; ++UI) {
12497     if (UI->getOpcode() != ISD::ZERO_EXTEND)
12498       return SDValue();
12499   }
12500 
12501   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12502   auto OpSize = N->getOperand(0).getValueSizeInBits();
12503 
12504   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
12505 
12506   if (OpSize < Size) {
12507     switch (CC) {
12508     default: break;
12509     case ISD::SETULT:
12510       return generateEquivalentSub(N, Size, false, false, DL, DAG);
12511     case ISD::SETULE:
12512       return generateEquivalentSub(N, Size, true, true, DL, DAG);
12513     case ISD::SETUGT:
12514       return generateEquivalentSub(N, Size, false, true, DL, DAG);
12515     case ISD::SETUGE:
12516       return generateEquivalentSub(N, Size, true, false, DL, DAG);
12517     }
12518   }
12519 
12520   return SDValue();
12521 }
12522 
12523 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
12524                                                   DAGCombinerInfo &DCI) const {
12525   SelectionDAG &DAG = DCI.DAG;
12526   SDLoc dl(N);
12527 
12528   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
12529   // If we're tracking CR bits, we need to be careful that we don't have:
12530   //   trunc(binary-ops(zext(x), zext(y)))
12531   // or
12532   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
12533   // such that we're unnecessarily moving things into GPRs when it would be
12534   // better to keep them in CR bits.
12535 
12536   // Note that trunc here can be an actual i1 trunc, or can be the effective
12537   // truncation that comes from a setcc or select_cc.
12538   if (N->getOpcode() == ISD::TRUNCATE &&
12539       N->getValueType(0) != MVT::i1)
12540     return SDValue();
12541 
12542   if (N->getOperand(0).getValueType() != MVT::i32 &&
12543       N->getOperand(0).getValueType() != MVT::i64)
12544     return SDValue();
12545 
12546   if (N->getOpcode() == ISD::SETCC ||
12547       N->getOpcode() == ISD::SELECT_CC) {
12548     // If we're looking at a comparison, then we need to make sure that the
12549     // high bits (all except for the first) don't matter the result.
12550     ISD::CondCode CC =
12551       cast<CondCodeSDNode>(N->getOperand(
12552         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
12553     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
12554 
12555     if (ISD::isSignedIntSetCC(CC)) {
12556       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
12557           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
12558         return SDValue();
12559     } else if (ISD::isUnsignedIntSetCC(CC)) {
12560       if (!DAG.MaskedValueIsZero(N->getOperand(0),
12561                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
12562           !DAG.MaskedValueIsZero(N->getOperand(1),
12563                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
12564         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
12565                                              : SDValue());
12566     } else {
12567       // This is neither a signed nor an unsigned comparison, just make sure
12568       // that the high bits are equal.
12569       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
12570       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
12571 
12572       // We don't really care about what is known about the first bit (if
12573       // anything), so pretend that it is known zero for both to ensure they can
12574       // be compared as constants.
12575       Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
12576       Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
12577 
12578       if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
12579           Op1Known.getConstant() != Op2Known.getConstant())
12580         return SDValue();
12581     }
12582   }
12583 
12584   // We now know that the higher-order bits are irrelevant, we just need to
12585   // make sure that all of the intermediate operations are bit operations, and
12586   // all inputs are extensions.
12587   if (N->getOperand(0).getOpcode() != ISD::AND &&
12588       N->getOperand(0).getOpcode() != ISD::OR  &&
12589       N->getOperand(0).getOpcode() != ISD::XOR &&
12590       N->getOperand(0).getOpcode() != ISD::SELECT &&
12591       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
12592       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
12593       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
12594       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
12595       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
12596     return SDValue();
12597 
12598   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
12599       N->getOperand(1).getOpcode() != ISD::AND &&
12600       N->getOperand(1).getOpcode() != ISD::OR  &&
12601       N->getOperand(1).getOpcode() != ISD::XOR &&
12602       N->getOperand(1).getOpcode() != ISD::SELECT &&
12603       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
12604       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
12605       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
12606       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
12607       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
12608     return SDValue();
12609 
12610   SmallVector<SDValue, 4> Inputs;
12611   SmallVector<SDValue, 8> BinOps, PromOps;
12612   SmallPtrSet<SDNode *, 16> Visited;
12613 
12614   for (unsigned i = 0; i < 2; ++i) {
12615     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12616           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12617           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12618           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12619         isa<ConstantSDNode>(N->getOperand(i)))
12620       Inputs.push_back(N->getOperand(i));
12621     else
12622       BinOps.push_back(N->getOperand(i));
12623 
12624     if (N->getOpcode() == ISD::TRUNCATE)
12625       break;
12626   }
12627 
12628   // Visit all inputs, collect all binary operations (and, or, xor and
12629   // select) that are all fed by extensions.
12630   while (!BinOps.empty()) {
12631     SDValue BinOp = BinOps.pop_back_val();
12632 
12633     if (!Visited.insert(BinOp.getNode()).second)
12634       continue;
12635 
12636     PromOps.push_back(BinOp);
12637 
12638     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12639       // The condition of the select is not promoted.
12640       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12641         continue;
12642       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12643         continue;
12644 
12645       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12646             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12647             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12648            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12649           isa<ConstantSDNode>(BinOp.getOperand(i))) {
12650         Inputs.push_back(BinOp.getOperand(i));
12651       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12652                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
12653                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12654                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12655                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
12656                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12657                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12658                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12659                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
12660         BinOps.push_back(BinOp.getOperand(i));
12661       } else {
12662         // We have an input that is not an extension or another binary
12663         // operation; we'll abort this transformation.
12664         return SDValue();
12665       }
12666     }
12667   }
12668 
12669   // Make sure that this is a self-contained cluster of operations (which
12670   // is not quite the same thing as saying that everything has only one
12671   // use).
12672   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12673     if (isa<ConstantSDNode>(Inputs[i]))
12674       continue;
12675 
12676     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12677                               UE = Inputs[i].getNode()->use_end();
12678          UI != UE; ++UI) {
12679       SDNode *User = *UI;
12680       if (User != N && !Visited.count(User))
12681         return SDValue();
12682 
12683       // Make sure that we're not going to promote the non-output-value
12684       // operand(s) or SELECT or SELECT_CC.
12685       // FIXME: Although we could sometimes handle this, and it does occur in
12686       // practice that one of the condition inputs to the select is also one of
12687       // the outputs, we currently can't deal with this.
12688       if (User->getOpcode() == ISD::SELECT) {
12689         if (User->getOperand(0) == Inputs[i])
12690           return SDValue();
12691       } else if (User->getOpcode() == ISD::SELECT_CC) {
12692         if (User->getOperand(0) == Inputs[i] ||
12693             User->getOperand(1) == Inputs[i])
12694           return SDValue();
12695       }
12696     }
12697   }
12698 
12699   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12700     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12701                               UE = PromOps[i].getNode()->use_end();
12702          UI != UE; ++UI) {
12703       SDNode *User = *UI;
12704       if (User != N && !Visited.count(User))
12705         return SDValue();
12706 
12707       // Make sure that we're not going to promote the non-output-value
12708       // operand(s) or SELECT or SELECT_CC.
12709       // FIXME: Although we could sometimes handle this, and it does occur in
12710       // practice that one of the condition inputs to the select is also one of
12711       // the outputs, we currently can't deal with this.
12712       if (User->getOpcode() == ISD::SELECT) {
12713         if (User->getOperand(0) == PromOps[i])
12714           return SDValue();
12715       } else if (User->getOpcode() == ISD::SELECT_CC) {
12716         if (User->getOperand(0) == PromOps[i] ||
12717             User->getOperand(1) == PromOps[i])
12718           return SDValue();
12719       }
12720     }
12721   }
12722 
12723   // Replace all inputs with the extension operand.
12724   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12725     // Constants may have users outside the cluster of to-be-promoted nodes,
12726     // and so we need to replace those as we do the promotions.
12727     if (isa<ConstantSDNode>(Inputs[i]))
12728       continue;
12729     else
12730       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12731   }
12732 
12733   std::list<HandleSDNode> PromOpHandles;
12734   for (auto &PromOp : PromOps)
12735     PromOpHandles.emplace_back(PromOp);
12736 
12737   // Replace all operations (these are all the same, but have a different
12738   // (i1) return type). DAG.getNode will validate that the types of
12739   // a binary operator match, so go through the list in reverse so that
12740   // we've likely promoted both operands first. Any intermediate truncations or
12741   // extensions disappear.
12742   while (!PromOpHandles.empty()) {
12743     SDValue PromOp = PromOpHandles.back().getValue();
12744     PromOpHandles.pop_back();
12745 
12746     if (PromOp.getOpcode() == ISD::TRUNCATE ||
12747         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12748         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12749         PromOp.getOpcode() == ISD::ANY_EXTEND) {
12750       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12751           PromOp.getOperand(0).getValueType() != MVT::i1) {
12752         // The operand is not yet ready (see comment below).
12753         PromOpHandles.emplace_front(PromOp);
12754         continue;
12755       }
12756 
12757       SDValue RepValue = PromOp.getOperand(0);
12758       if (isa<ConstantSDNode>(RepValue))
12759         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
12760 
12761       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
12762       continue;
12763     }
12764 
12765     unsigned C;
12766     switch (PromOp.getOpcode()) {
12767     default:             C = 0; break;
12768     case ISD::SELECT:    C = 1; break;
12769     case ISD::SELECT_CC: C = 2; break;
12770     }
12771 
12772     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12773          PromOp.getOperand(C).getValueType() != MVT::i1) ||
12774         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12775          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12776       // The to-be-promoted operands of this node have not yet been
12777       // promoted (this should be rare because we're going through the
12778       // list backward, but if one of the operands has several users in
12779       // this cluster of to-be-promoted nodes, it is possible).
12780       PromOpHandles.emplace_front(PromOp);
12781       continue;
12782     }
12783 
12784     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12785                                 PromOp.getNode()->op_end());
12786 
12787     // If there are any constant inputs, make sure they're replaced now.
12788     for (unsigned i = 0; i < 2; ++i)
12789       if (isa<ConstantSDNode>(Ops[C+i]))
12790         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
12791 
12792     DAG.ReplaceAllUsesOfValueWith(PromOp,
12793       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
12794   }
12795 
12796   // Now we're left with the initial truncation itself.
12797   if (N->getOpcode() == ISD::TRUNCATE)
12798     return N->getOperand(0);
12799 
12800   // Otherwise, this is a comparison. The operands to be compared have just
12801   // changed type (to i1), but everything else is the same.
12802   return SDValue(N, 0);
12803 }
12804 
12805 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
12806                                                   DAGCombinerInfo &DCI) const {
12807   SelectionDAG &DAG = DCI.DAG;
12808   SDLoc dl(N);
12809 
12810   // If we're tracking CR bits, we need to be careful that we don't have:
12811   //   zext(binary-ops(trunc(x), trunc(y)))
12812   // or
12813   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12814   // such that we're unnecessarily moving things into CR bits that can more
12815   // efficiently stay in GPRs. Note that if we're not certain that the high
12816   // bits are set as required by the final extension, we still may need to do
12817   // some masking to get the proper behavior.
12818 
12819   // This same functionality is important on PPC64 when dealing with
12820   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12821   // the return values of functions. Because it is so similar, it is handled
12822   // here as well.
12823 
12824   if (N->getValueType(0) != MVT::i32 &&
12825       N->getValueType(0) != MVT::i64)
12826     return SDValue();
12827 
12828   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
12829         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
12830     return SDValue();
12831 
12832   if (N->getOperand(0).getOpcode() != ISD::AND &&
12833       N->getOperand(0).getOpcode() != ISD::OR  &&
12834       N->getOperand(0).getOpcode() != ISD::XOR &&
12835       N->getOperand(0).getOpcode() != ISD::SELECT &&
12836       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
12837     return SDValue();
12838 
12839   SmallVector<SDValue, 4> Inputs;
12840   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
12841   SmallPtrSet<SDNode *, 16> Visited;
12842 
12843   // Visit all inputs, collect all binary operations (and, or, xor and
12844   // select) that are all fed by truncations.
12845   while (!BinOps.empty()) {
12846     SDValue BinOp = BinOps.pop_back_val();
12847 
12848     if (!Visited.insert(BinOp.getNode()).second)
12849       continue;
12850 
12851     PromOps.push_back(BinOp);
12852 
12853     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12854       // The condition of the select is not promoted.
12855       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12856         continue;
12857       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12858         continue;
12859 
12860       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12861           isa<ConstantSDNode>(BinOp.getOperand(i))) {
12862         Inputs.push_back(BinOp.getOperand(i));
12863       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12864                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
12865                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12866                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12867                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
12868         BinOps.push_back(BinOp.getOperand(i));
12869       } else {
12870         // We have an input that is not a truncation or another binary
12871         // operation; we'll abort this transformation.
12872         return SDValue();
12873       }
12874     }
12875   }
12876 
12877   // The operands of a select that must be truncated when the select is
12878   // promoted because the operand is actually part of the to-be-promoted set.
12879   DenseMap<SDNode *, EVT> SelectTruncOp[2];
12880 
12881   // Make sure that this is a self-contained cluster of operations (which
12882   // is not quite the same thing as saying that everything has only one
12883   // use).
12884   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12885     if (isa<ConstantSDNode>(Inputs[i]))
12886       continue;
12887 
12888     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12889                               UE = Inputs[i].getNode()->use_end();
12890          UI != UE; ++UI) {
12891       SDNode *User = *UI;
12892       if (User != N && !Visited.count(User))
12893         return SDValue();
12894 
12895       // If we're going to promote the non-output-value operand(s) or SELECT or
12896       // SELECT_CC, record them for truncation.
12897       if (User->getOpcode() == ISD::SELECT) {
12898         if (User->getOperand(0) == Inputs[i])
12899           SelectTruncOp[0].insert(std::make_pair(User,
12900                                     User->getOperand(0).getValueType()));
12901       } else if (User->getOpcode() == ISD::SELECT_CC) {
12902         if (User->getOperand(0) == Inputs[i])
12903           SelectTruncOp[0].insert(std::make_pair(User,
12904                                     User->getOperand(0).getValueType()));
12905         if (User->getOperand(1) == Inputs[i])
12906           SelectTruncOp[1].insert(std::make_pair(User,
12907                                     User->getOperand(1).getValueType()));
12908       }
12909     }
12910   }
12911 
12912   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12913     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12914                               UE = PromOps[i].getNode()->use_end();
12915          UI != UE; ++UI) {
12916       SDNode *User = *UI;
12917       if (User != N && !Visited.count(User))
12918         return SDValue();
12919 
12920       // If we're going to promote the non-output-value operand(s) or SELECT or
12921       // SELECT_CC, record them for truncation.
12922       if (User->getOpcode() == ISD::SELECT) {
12923         if (User->getOperand(0) == PromOps[i])
12924           SelectTruncOp[0].insert(std::make_pair(User,
12925                                     User->getOperand(0).getValueType()));
12926       } else if (User->getOpcode() == ISD::SELECT_CC) {
12927         if (User->getOperand(0) == PromOps[i])
12928           SelectTruncOp[0].insert(std::make_pair(User,
12929                                     User->getOperand(0).getValueType()));
12930         if (User->getOperand(1) == PromOps[i])
12931           SelectTruncOp[1].insert(std::make_pair(User,
12932                                     User->getOperand(1).getValueType()));
12933       }
12934     }
12935   }
12936 
12937   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
12938   bool ReallyNeedsExt = false;
12939   if (N->getOpcode() != ISD::ANY_EXTEND) {
12940     // If all of the inputs are not already sign/zero extended, then
12941     // we'll still need to do that at the end.
12942     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12943       if (isa<ConstantSDNode>(Inputs[i]))
12944         continue;
12945 
12946       unsigned OpBits =
12947         Inputs[i].getOperand(0).getValueSizeInBits();
12948       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
12949 
12950       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
12951            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
12952                                   APInt::getHighBitsSet(OpBits,
12953                                                         OpBits-PromBits))) ||
12954           (N->getOpcode() == ISD::SIGN_EXTEND &&
12955            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
12956              (OpBits-(PromBits-1)))) {
12957         ReallyNeedsExt = true;
12958         break;
12959       }
12960     }
12961   }
12962 
12963   // Replace all inputs, either with the truncation operand, or a
12964   // truncation or extension to the final output type.
12965   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12966     // Constant inputs need to be replaced with the to-be-promoted nodes that
12967     // use them because they might have users outside of the cluster of
12968     // promoted nodes.
12969     if (isa<ConstantSDNode>(Inputs[i]))
12970       continue;
12971 
12972     SDValue InSrc = Inputs[i].getOperand(0);
12973     if (Inputs[i].getValueType() == N->getValueType(0))
12974       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
12975     else if (N->getOpcode() == ISD::SIGN_EXTEND)
12976       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12977         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
12978     else if (N->getOpcode() == ISD::ZERO_EXTEND)
12979       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12980         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
12981     else
12982       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
12983         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
12984   }
12985 
12986   std::list<HandleSDNode> PromOpHandles;
12987   for (auto &PromOp : PromOps)
12988     PromOpHandles.emplace_back(PromOp);
12989 
12990   // Replace all operations (these are all the same, but have a different
12991   // (promoted) return type). DAG.getNode will validate that the types of
12992   // a binary operator match, so go through the list in reverse so that
12993   // we've likely promoted both operands first.
12994   while (!PromOpHandles.empty()) {
12995     SDValue PromOp = PromOpHandles.back().getValue();
12996     PromOpHandles.pop_back();
12997 
12998     unsigned C;
12999     switch (PromOp.getOpcode()) {
13000     default:             C = 0; break;
13001     case ISD::SELECT:    C = 1; break;
13002     case ISD::SELECT_CC: C = 2; break;
13003     }
13004 
13005     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13006          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13007         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13008          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13009       // The to-be-promoted operands of this node have not yet been
13010       // promoted (this should be rare because we're going through the
13011       // list backward, but if one of the operands has several users in
13012       // this cluster of to-be-promoted nodes, it is possible).
13013       PromOpHandles.emplace_front(PromOp);
13014       continue;
13015     }
13016 
13017     // For SELECT and SELECT_CC nodes, we do a similar check for any
13018     // to-be-promoted comparison inputs.
13019     if (PromOp.getOpcode() == ISD::SELECT ||
13020         PromOp.getOpcode() == ISD::SELECT_CC) {
13021       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13022            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13023           (SelectTruncOp[1].count(PromOp.getNode()) &&
13024            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13025         PromOpHandles.emplace_front(PromOp);
13026         continue;
13027       }
13028     }
13029 
13030     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13031                                 PromOp.getNode()->op_end());
13032 
13033     // If this node has constant inputs, then they'll need to be promoted here.
13034     for (unsigned i = 0; i < 2; ++i) {
13035       if (!isa<ConstantSDNode>(Ops[C+i]))
13036         continue;
13037       if (Ops[C+i].getValueType() == N->getValueType(0))
13038         continue;
13039 
13040       if (N->getOpcode() == ISD::SIGN_EXTEND)
13041         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13042       else if (N->getOpcode() == ISD::ZERO_EXTEND)
13043         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13044       else
13045         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13046     }
13047 
13048     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13049     // truncate them again to the original value type.
13050     if (PromOp.getOpcode() == ISD::SELECT ||
13051         PromOp.getOpcode() == ISD::SELECT_CC) {
13052       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13053       if (SI0 != SelectTruncOp[0].end())
13054         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13055       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13056       if (SI1 != SelectTruncOp[1].end())
13057         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13058     }
13059 
13060     DAG.ReplaceAllUsesOfValueWith(PromOp,
13061       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13062   }
13063 
13064   // Now we're left with the initial extension itself.
13065   if (!ReallyNeedsExt)
13066     return N->getOperand(0);
13067 
13068   // To zero extend, just mask off everything except for the first bit (in the
13069   // i1 case).
13070   if (N->getOpcode() == ISD::ZERO_EXTEND)
13071     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13072                        DAG.getConstant(APInt::getLowBitsSet(
13073                                          N->getValueSizeInBits(0), PromBits),
13074                                        dl, N->getValueType(0)));
13075 
13076   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13077          "Invalid extension type");
13078   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13079   SDValue ShiftCst =
13080       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13081   return DAG.getNode(
13082       ISD::SRA, dl, N->getValueType(0),
13083       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13084       ShiftCst);
13085 }
13086 
13087 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13088                                         DAGCombinerInfo &DCI) const {
13089   assert(N->getOpcode() == ISD::SETCC &&
13090          "Should be called with a SETCC node");
13091 
13092   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13093   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13094     SDValue LHS = N->getOperand(0);
13095     SDValue RHS = N->getOperand(1);
13096 
13097     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13098     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13099         LHS.hasOneUse())
13100       std::swap(LHS, RHS);
13101 
13102     // x == 0-y --> x+y == 0
13103     // x != 0-y --> x+y != 0
13104     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13105         RHS.hasOneUse()) {
13106       SDLoc DL(N);
13107       SelectionDAG &DAG = DCI.DAG;
13108       EVT VT = N->getValueType(0);
13109       EVT OpVT = LHS.getValueType();
13110       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13111       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13112     }
13113   }
13114 
13115   return DAGCombineTruncBoolExt(N, DCI);
13116 }
13117 
13118 // Is this an extending load from an f32 to an f64?
13119 static bool isFPExtLoad(SDValue Op) {
13120   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13121     return LD->getExtensionType() == ISD::EXTLOAD &&
13122       Op.getValueType() == MVT::f64;
13123   return false;
13124 }
13125 
13126 /// Reduces the number of fp-to-int conversion when building a vector.
13127 ///
13128 /// If this vector is built out of floating to integer conversions,
13129 /// transform it to a vector built out of floating point values followed by a
13130 /// single floating to integer conversion of the vector.
13131 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
13132 /// becomes (fptosi (build_vector ($A, $B, ...)))
13133 SDValue PPCTargetLowering::
13134 combineElementTruncationToVectorTruncation(SDNode *N,
13135                                            DAGCombinerInfo &DCI) const {
13136   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13137          "Should be called with a BUILD_VECTOR node");
13138 
13139   SelectionDAG &DAG = DCI.DAG;
13140   SDLoc dl(N);
13141 
13142   SDValue FirstInput = N->getOperand(0);
13143   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13144          "The input operand must be an fp-to-int conversion.");
13145 
13146   // This combine happens after legalization so the fp_to_[su]i nodes are
13147   // already converted to PPCSISD nodes.
13148   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13149   if (FirstConversion == PPCISD::FCTIDZ ||
13150       FirstConversion == PPCISD::FCTIDUZ ||
13151       FirstConversion == PPCISD::FCTIWZ ||
13152       FirstConversion == PPCISD::FCTIWUZ) {
13153     bool IsSplat = true;
13154     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13155       FirstConversion == PPCISD::FCTIWUZ;
13156     EVT SrcVT = FirstInput.getOperand(0).getValueType();
13157     SmallVector<SDValue, 4> Ops;
13158     EVT TargetVT = N->getValueType(0);
13159     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13160       SDValue NextOp = N->getOperand(i);
13161       if (NextOp.getOpcode() != PPCISD::MFVSR)
13162         return SDValue();
13163       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13164       if (NextConversion != FirstConversion)
13165         return SDValue();
13166       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13167       // This is not valid if the input was originally double precision. It is
13168       // also not profitable to do unless this is an extending load in which
13169       // case doing this combine will allow us to combine consecutive loads.
13170       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13171         return SDValue();
13172       if (N->getOperand(i) != FirstInput)
13173         IsSplat = false;
13174     }
13175 
13176     // If this is a splat, we leave it as-is since there will be only a single
13177     // fp-to-int conversion followed by a splat of the integer. This is better
13178     // for 32-bit and smaller ints and neutral for 64-bit ints.
13179     if (IsSplat)
13180       return SDValue();
13181 
13182     // Now that we know we have the right type of node, get its operands
13183     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13184       SDValue In = N->getOperand(i).getOperand(0);
13185       if (Is32Bit) {
13186         // For 32-bit values, we need to add an FP_ROUND node (if we made it
13187         // here, we know that all inputs are extending loads so this is safe).
13188         if (In.isUndef())
13189           Ops.push_back(DAG.getUNDEF(SrcVT));
13190         else {
13191           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13192                                       MVT::f32, In.getOperand(0),
13193                                       DAG.getIntPtrConstant(1, dl));
13194           Ops.push_back(Trunc);
13195         }
13196       } else
13197         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13198     }
13199 
13200     unsigned Opcode;
13201     if (FirstConversion == PPCISD::FCTIDZ ||
13202         FirstConversion == PPCISD::FCTIWZ)
13203       Opcode = ISD::FP_TO_SINT;
13204     else
13205       Opcode = ISD::FP_TO_UINT;
13206 
13207     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13208     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13209     return DAG.getNode(Opcode, dl, TargetVT, BV);
13210   }
13211   return SDValue();
13212 }
13213 
13214 /// Reduce the number of loads when building a vector.
13215 ///
13216 /// Building a vector out of multiple loads can be converted to a load
13217 /// of the vector type if the loads are consecutive. If the loads are
13218 /// consecutive but in descending order, a shuffle is added at the end
13219 /// to reorder the vector.
13220 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
13221   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13222          "Should be called with a BUILD_VECTOR node");
13223 
13224   SDLoc dl(N);
13225 
13226   // Return early for non byte-sized type, as they can't be consecutive.
13227   if (!N->getValueType(0).getVectorElementType().isByteSized())
13228     return SDValue();
13229 
13230   bool InputsAreConsecutiveLoads = true;
13231   bool InputsAreReverseConsecutive = true;
13232   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13233   SDValue FirstInput = N->getOperand(0);
13234   bool IsRoundOfExtLoad = false;
13235 
13236   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13237       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13238     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13239     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13240   }
13241   // Not a build vector of (possibly fp_rounded) loads.
13242   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13243       N->getNumOperands() == 1)
13244     return SDValue();
13245 
13246   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13247     // If any inputs are fp_round(extload), they all must be.
13248     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13249       return SDValue();
13250 
13251     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13252       N->getOperand(i);
13253     if (NextInput.getOpcode() != ISD::LOAD)
13254       return SDValue();
13255 
13256     SDValue PreviousInput =
13257       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13258     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13259     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13260 
13261     // If any inputs are fp_round(extload), they all must be.
13262     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13263       return SDValue();
13264 
13265     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13266       InputsAreConsecutiveLoads = false;
13267     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13268       InputsAreReverseConsecutive = false;
13269 
13270     // Exit early if the loads are neither consecutive nor reverse consecutive.
13271     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13272       return SDValue();
13273   }
13274 
13275   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13276          "The loads cannot be both consecutive and reverse consecutive.");
13277 
13278   SDValue FirstLoadOp =
13279     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13280   SDValue LastLoadOp =
13281     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13282                        N->getOperand(N->getNumOperands()-1);
13283 
13284   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13285   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13286   if (InputsAreConsecutiveLoads) {
13287     assert(LD1 && "Input needs to be a LoadSDNode.");
13288     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13289                        LD1->getBasePtr(), LD1->getPointerInfo(),
13290                        LD1->getAlignment());
13291   }
13292   if (InputsAreReverseConsecutive) {
13293     assert(LDL && "Input needs to be a LoadSDNode.");
13294     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13295                                LDL->getBasePtr(), LDL->getPointerInfo(),
13296                                LDL->getAlignment());
13297     SmallVector<int, 16> Ops;
13298     for (int i = N->getNumOperands() - 1; i >= 0; i--)
13299       Ops.push_back(i);
13300 
13301     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13302                                 DAG.getUNDEF(N->getValueType(0)), Ops);
13303   }
13304   return SDValue();
13305 }
13306 
13307 // This function adds the required vector_shuffle needed to get
13308 // the elements of the vector extract in the correct position
13309 // as specified by the CorrectElems encoding.
13310 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
13311                                       SDValue Input, uint64_t Elems,
13312                                       uint64_t CorrectElems) {
13313   SDLoc dl(N);
13314 
13315   unsigned NumElems = Input.getValueType().getVectorNumElements();
13316   SmallVector<int, 16> ShuffleMask(NumElems, -1);
13317 
13318   // Knowing the element indices being extracted from the original
13319   // vector and the order in which they're being inserted, just put
13320   // them at element indices required for the instruction.
13321   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13322     if (DAG.getDataLayout().isLittleEndian())
13323       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13324     else
13325       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13326     CorrectElems = CorrectElems >> 8;
13327     Elems = Elems >> 8;
13328   }
13329 
13330   SDValue Shuffle =
13331       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13332                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13333 
13334   EVT VT = N->getValueType(0);
13335   SDValue Conv = DAG.getBitcast(VT, Shuffle);
13336 
13337   EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13338                                Input.getValueType().getVectorElementType(),
13339                                VT.getVectorNumElements());
13340   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13341                      DAG.getValueType(ExtVT));
13342 }
13343 
13344 // Look for build vector patterns where input operands come from sign
13345 // extended vector_extract elements of specific indices. If the correct indices
13346 // aren't used, add a vector shuffle to fix up the indices and create
13347 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13348 // during instruction selection.
13349 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
13350   // This array encodes the indices that the vector sign extend instructions
13351   // extract from when extending from one type to another for both BE and LE.
13352   // The right nibble of each byte corresponds to the LE incides.
13353   // and the left nibble of each byte corresponds to the BE incides.
13354   // For example: 0x3074B8FC  byte->word
13355   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13356   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13357   // For example: 0x000070F8  byte->double word
13358   // For LE: the allowed indices are: 0x0,0x8
13359   // For BE: the allowed indices are: 0x7,0xF
13360   uint64_t TargetElems[] = {
13361       0x3074B8FC, // b->w
13362       0x000070F8, // b->d
13363       0x10325476, // h->w
13364       0x00003074, // h->d
13365       0x00001032, // w->d
13366   };
13367 
13368   uint64_t Elems = 0;
13369   int Index;
13370   SDValue Input;
13371 
13372   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13373     if (!Op)
13374       return false;
13375     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13376         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13377       return false;
13378 
13379     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13380     // of the right width.
13381     SDValue Extract = Op.getOperand(0);
13382     if (Extract.getOpcode() == ISD::ANY_EXTEND)
13383       Extract = Extract.getOperand(0);
13384     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13385       return false;
13386 
13387     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13388     if (!ExtOp)
13389       return false;
13390 
13391     Index = ExtOp->getZExtValue();
13392     if (Input && Input != Extract.getOperand(0))
13393       return false;
13394 
13395     if (!Input)
13396       Input = Extract.getOperand(0);
13397 
13398     Elems = Elems << 8;
13399     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13400     Elems |= Index;
13401 
13402     return true;
13403   };
13404 
13405   // If the build vector operands aren't sign extended vector extracts,
13406   // of the same input vector, then return.
13407   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13408     if (!isSExtOfVecExtract(N->getOperand(i))) {
13409       return SDValue();
13410     }
13411   }
13412 
13413   // If the vector extract indicies are not correct, add the appropriate
13414   // vector_shuffle.
13415   int TgtElemArrayIdx;
13416   int InputSize = Input.getValueType().getScalarSizeInBits();
13417   int OutputSize = N->getValueType(0).getScalarSizeInBits();
13418   if (InputSize + OutputSize == 40)
13419     TgtElemArrayIdx = 0;
13420   else if (InputSize + OutputSize == 72)
13421     TgtElemArrayIdx = 1;
13422   else if (InputSize + OutputSize == 48)
13423     TgtElemArrayIdx = 2;
13424   else if (InputSize + OutputSize == 80)
13425     TgtElemArrayIdx = 3;
13426   else if (InputSize + OutputSize == 96)
13427     TgtElemArrayIdx = 4;
13428   else
13429     return SDValue();
13430 
13431   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13432   CorrectElems = DAG.getDataLayout().isLittleEndian()
13433                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13434                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13435   if (Elems != CorrectElems) {
13436     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13437   }
13438 
13439   // Regular lowering will catch cases where a shuffle is not needed.
13440   return SDValue();
13441 }
13442 
13443 // Look for the pattern of a load from a narrow width to i128, feeding
13444 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
13445 // (LXVRZX). This node represents a zero extending load that will be matched
13446 // to the Load VSX Vector Rightmost instructions.
13447 static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
13448   SDLoc DL(N);
13449 
13450   // This combine is only eligible for a BUILD_VECTOR of v1i128.
13451   if (N->getValueType(0) != MVT::v1i128)
13452     return SDValue();
13453 
13454   SDValue Operand = N->getOperand(0);
13455   // Proceed with the transformation if the operand to the BUILD_VECTOR
13456   // is a load instruction.
13457   if (Operand.getOpcode() != ISD::LOAD)
13458     return SDValue();
13459 
13460   LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
13461   EVT MemoryType = LD->getMemoryVT();
13462 
13463   // This transformation is only valid if the we are loading either a byte,
13464   // halfword, word, or doubleword.
13465   bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
13466                      MemoryType == MVT::i32 || MemoryType == MVT::i64;
13467 
13468   // Ensure that the load from the narrow width is being zero extended to i128.
13469   if (!ValidLDType ||
13470       (LD->getExtensionType() != ISD::ZEXTLOAD &&
13471        LD->getExtensionType() != ISD::EXTLOAD))
13472     return SDValue();
13473 
13474   SDValue LoadOps[] = {
13475       LD->getChain(), LD->getBasePtr(),
13476       DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
13477 
13478   return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
13479                                  DAG.getVTList(MVT::v1i128, MVT::Other),
13480                                  LoadOps, MemoryType, LD->getMemOperand());
13481 }
13482 
13483 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
13484                                                  DAGCombinerInfo &DCI) const {
13485   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13486          "Should be called with a BUILD_VECTOR node");
13487 
13488   SelectionDAG &DAG = DCI.DAG;
13489   SDLoc dl(N);
13490 
13491   if (!Subtarget.hasVSX())
13492     return SDValue();
13493 
13494   // The target independent DAG combiner will leave a build_vector of
13495   // float-to-int conversions intact. We can generate MUCH better code for
13496   // a float-to-int conversion of a vector of floats.
13497   SDValue FirstInput = N->getOperand(0);
13498   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
13499     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
13500     if (Reduced)
13501       return Reduced;
13502   }
13503 
13504   // If we're building a vector out of consecutive loads, just load that
13505   // vector type.
13506   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
13507   if (Reduced)
13508     return Reduced;
13509 
13510   // If we're building a vector out of extended elements from another vector
13511   // we have P9 vector integer extend instructions. The code assumes legal
13512   // input types (i.e. it can't handle things like v4i16) so do not run before
13513   // legalization.
13514   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
13515     Reduced = combineBVOfVecSExt(N, DAG);
13516     if (Reduced)
13517       return Reduced;
13518   }
13519 
13520   // On Power10, the Load VSX Vector Rightmost instructions can be utilized
13521   // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
13522   // is a load from <valid narrow width> to i128.
13523   if (Subtarget.isISA3_1()) {
13524     SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
13525     if (BVOfZLoad)
13526       return BVOfZLoad;
13527   }
13528 
13529   if (N->getValueType(0) != MVT::v2f64)
13530     return SDValue();
13531 
13532   // Looking for:
13533   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
13534   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
13535       FirstInput.getOpcode() != ISD::UINT_TO_FP)
13536     return SDValue();
13537   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
13538       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
13539     return SDValue();
13540   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
13541     return SDValue();
13542 
13543   SDValue Ext1 = FirstInput.getOperand(0);
13544   SDValue Ext2 = N->getOperand(1).getOperand(0);
13545   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13546      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13547     return SDValue();
13548 
13549   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
13550   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
13551   if (!Ext1Op || !Ext2Op)
13552     return SDValue();
13553   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
13554       Ext1.getOperand(0) != Ext2.getOperand(0))
13555     return SDValue();
13556 
13557   int FirstElem = Ext1Op->getZExtValue();
13558   int SecondElem = Ext2Op->getZExtValue();
13559   int SubvecIdx;
13560   if (FirstElem == 0 && SecondElem == 1)
13561     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
13562   else if (FirstElem == 2 && SecondElem == 3)
13563     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
13564   else
13565     return SDValue();
13566 
13567   SDValue SrcVec = Ext1.getOperand(0);
13568   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
13569     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
13570   return DAG.getNode(NodeType, dl, MVT::v2f64,
13571                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
13572 }
13573 
13574 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
13575                                               DAGCombinerInfo &DCI) const {
13576   assert((N->getOpcode() == ISD::SINT_TO_FP ||
13577           N->getOpcode() == ISD::UINT_TO_FP) &&
13578          "Need an int -> FP conversion node here");
13579 
13580   if (useSoftFloat() || !Subtarget.has64BitSupport())
13581     return SDValue();
13582 
13583   SelectionDAG &DAG = DCI.DAG;
13584   SDLoc dl(N);
13585   SDValue Op(N, 0);
13586 
13587   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
13588   // from the hardware.
13589   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
13590     return SDValue();
13591   if (!Op.getOperand(0).getValueType().isSimple())
13592     return SDValue();
13593   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
13594       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
13595     return SDValue();
13596 
13597   SDValue FirstOperand(Op.getOperand(0));
13598   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
13599     (FirstOperand.getValueType() == MVT::i8 ||
13600      FirstOperand.getValueType() == MVT::i16);
13601   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
13602     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
13603     bool DstDouble = Op.getValueType() == MVT::f64;
13604     unsigned ConvOp = Signed ?
13605       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
13606       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
13607     SDValue WidthConst =
13608       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
13609                             dl, false);
13610     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
13611     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
13612     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
13613                                          DAG.getVTList(MVT::f64, MVT::Other),
13614                                          Ops, MVT::i8, LDN->getMemOperand());
13615 
13616     // For signed conversion, we need to sign-extend the value in the VSR
13617     if (Signed) {
13618       SDValue ExtOps[] = { Ld, WidthConst };
13619       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
13620       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
13621     } else
13622       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
13623   }
13624 
13625 
13626   // For i32 intermediate values, unfortunately, the conversion functions
13627   // leave the upper 32 bits of the value are undefined. Within the set of
13628   // scalar instructions, we have no method for zero- or sign-extending the
13629   // value. Thus, we cannot handle i32 intermediate values here.
13630   if (Op.getOperand(0).getValueType() == MVT::i32)
13631     return SDValue();
13632 
13633   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
13634          "UINT_TO_FP is supported only with FPCVT");
13635 
13636   // If we have FCFIDS, then use it when converting to single-precision.
13637   // Otherwise, convert to double-precision and then round.
13638   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13639                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
13640                                                             : PPCISD::FCFIDS)
13641                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
13642                                                             : PPCISD::FCFID);
13643   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13644                   ? MVT::f32
13645                   : MVT::f64;
13646 
13647   // If we're converting from a float, to an int, and back to a float again,
13648   // then we don't need the store/load pair at all.
13649   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
13650        Subtarget.hasFPCVT()) ||
13651       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
13652     SDValue Src = Op.getOperand(0).getOperand(0);
13653     if (Src.getValueType() == MVT::f32) {
13654       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
13655       DCI.AddToWorklist(Src.getNode());
13656     } else if (Src.getValueType() != MVT::f64) {
13657       // Make sure that we don't pick up a ppc_fp128 source value.
13658       return SDValue();
13659     }
13660 
13661     unsigned FCTOp =
13662       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
13663                                                         PPCISD::FCTIDUZ;
13664 
13665     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
13666     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
13667 
13668     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
13669       FP = DAG.getNode(ISD::FP_ROUND, dl,
13670                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
13671       DCI.AddToWorklist(FP.getNode());
13672     }
13673 
13674     return FP;
13675   }
13676 
13677   return SDValue();
13678 }
13679 
13680 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
13681 // builtins) into loads with swaps.
13682 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
13683                                               DAGCombinerInfo &DCI) const {
13684   SelectionDAG &DAG = DCI.DAG;
13685   SDLoc dl(N);
13686   SDValue Chain;
13687   SDValue Base;
13688   MachineMemOperand *MMO;
13689 
13690   switch (N->getOpcode()) {
13691   default:
13692     llvm_unreachable("Unexpected opcode for little endian VSX load");
13693   case ISD::LOAD: {
13694     LoadSDNode *LD = cast<LoadSDNode>(N);
13695     Chain = LD->getChain();
13696     Base = LD->getBasePtr();
13697     MMO = LD->getMemOperand();
13698     // If the MMO suggests this isn't a load of a full vector, leave
13699     // things alone.  For a built-in, we have to make the change for
13700     // correctness, so if there is a size problem that will be a bug.
13701     if (MMO->getSize() < 16)
13702       return SDValue();
13703     break;
13704   }
13705   case ISD::INTRINSIC_W_CHAIN: {
13706     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13707     Chain = Intrin->getChain();
13708     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
13709     // us what we want. Get operand 2 instead.
13710     Base = Intrin->getOperand(2);
13711     MMO = Intrin->getMemOperand();
13712     break;
13713   }
13714   }
13715 
13716   MVT VecTy = N->getValueType(0).getSimpleVT();
13717 
13718   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
13719   // aligned and the type is a vector with elements up to 4 bytes
13720   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13721       VecTy.getScalarSizeInBits() <= 32) {
13722     return SDValue();
13723   }
13724 
13725   SDValue LoadOps[] = { Chain, Base };
13726   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
13727                                          DAG.getVTList(MVT::v2f64, MVT::Other),
13728                                          LoadOps, MVT::v2f64, MMO);
13729 
13730   DCI.AddToWorklist(Load.getNode());
13731   Chain = Load.getValue(1);
13732   SDValue Swap = DAG.getNode(
13733       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
13734   DCI.AddToWorklist(Swap.getNode());
13735 
13736   // Add a bitcast if the resulting load type doesn't match v2f64.
13737   if (VecTy != MVT::v2f64) {
13738     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
13739     DCI.AddToWorklist(N.getNode());
13740     // Package {bitcast value, swap's chain} to match Load's shape.
13741     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
13742                        N, Swap.getValue(1));
13743   }
13744 
13745   return Swap;
13746 }
13747 
13748 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13749 // builtins) into stores with swaps.
13750 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
13751                                                DAGCombinerInfo &DCI) const {
13752   SelectionDAG &DAG = DCI.DAG;
13753   SDLoc dl(N);
13754   SDValue Chain;
13755   SDValue Base;
13756   unsigned SrcOpnd;
13757   MachineMemOperand *MMO;
13758 
13759   switch (N->getOpcode()) {
13760   default:
13761     llvm_unreachable("Unexpected opcode for little endian VSX store");
13762   case ISD::STORE: {
13763     StoreSDNode *ST = cast<StoreSDNode>(N);
13764     Chain = ST->getChain();
13765     Base = ST->getBasePtr();
13766     MMO = ST->getMemOperand();
13767     SrcOpnd = 1;
13768     // If the MMO suggests this isn't a store of a full vector, leave
13769     // things alone.  For a built-in, we have to make the change for
13770     // correctness, so if there is a size problem that will be a bug.
13771     if (MMO->getSize() < 16)
13772       return SDValue();
13773     break;
13774   }
13775   case ISD::INTRINSIC_VOID: {
13776     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13777     Chain = Intrin->getChain();
13778     // Intrin->getBasePtr() oddly does not get what we want.
13779     Base = Intrin->getOperand(3);
13780     MMO = Intrin->getMemOperand();
13781     SrcOpnd = 2;
13782     break;
13783   }
13784   }
13785 
13786   SDValue Src = N->getOperand(SrcOpnd);
13787   MVT VecTy = Src.getValueType().getSimpleVT();
13788 
13789   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13790   // aligned and the type is a vector with elements up to 4 bytes
13791   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13792       VecTy.getScalarSizeInBits() <= 32) {
13793     return SDValue();
13794   }
13795 
13796   // All stores are done as v2f64 and possible bit cast.
13797   if (VecTy != MVT::v2f64) {
13798     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13799     DCI.AddToWorklist(Src.getNode());
13800   }
13801 
13802   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13803                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13804   DCI.AddToWorklist(Swap.getNode());
13805   Chain = Swap.getValue(1);
13806   SDValue StoreOps[] = { Chain, Swap, Base };
13807   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
13808                                           DAG.getVTList(MVT::Other),
13809                                           StoreOps, VecTy, MMO);
13810   DCI.AddToWorklist(Store.getNode());
13811   return Store;
13812 }
13813 
13814 // Handle DAG combine for STORE (FP_TO_INT F).
13815 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13816                                                DAGCombinerInfo &DCI) const {
13817 
13818   SelectionDAG &DAG = DCI.DAG;
13819   SDLoc dl(N);
13820   unsigned Opcode = N->getOperand(1).getOpcode();
13821 
13822   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
13823          && "Not a FP_TO_INT Instruction!");
13824 
13825   SDValue Val = N->getOperand(1).getOperand(0);
13826   EVT Op1VT = N->getOperand(1).getValueType();
13827   EVT ResVT = Val.getValueType();
13828 
13829   if (!isTypeLegal(ResVT))
13830     return SDValue();
13831 
13832   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13833   bool ValidTypeForStoreFltAsInt =
13834         (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
13835          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
13836 
13837   if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
13838       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
13839     return SDValue();
13840 
13841   // Extend f32 values to f64
13842   if (ResVT.getScalarSizeInBits() == 32) {
13843     Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
13844     DCI.AddToWorklist(Val.getNode());
13845   }
13846 
13847   // Set signed or unsigned conversion opcode.
13848   unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
13849                           PPCISD::FP_TO_SINT_IN_VSR :
13850                           PPCISD::FP_TO_UINT_IN_VSR;
13851 
13852   Val = DAG.getNode(ConvOpcode,
13853                     dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
13854   DCI.AddToWorklist(Val.getNode());
13855 
13856   // Set number of bytes being converted.
13857   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
13858   SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
13859                     DAG.getIntPtrConstant(ByteSize, dl, false),
13860                     DAG.getValueType(Op1VT) };
13861 
13862   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
13863           DAG.getVTList(MVT::Other), Ops,
13864           cast<StoreSDNode>(N)->getMemoryVT(),
13865           cast<StoreSDNode>(N)->getMemOperand());
13866 
13867   DCI.AddToWorklist(Val.getNode());
13868   return Val;
13869 }
13870 
13871 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
13872   // Check that the source of the element keeps flipping
13873   // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
13874   bool PrevElemFromFirstVec = Mask[0] < NumElts;
13875   for (int i = 1, e = Mask.size(); i < e; i++) {
13876     if (PrevElemFromFirstVec && Mask[i] < NumElts)
13877       return false;
13878     if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
13879       return false;
13880     PrevElemFromFirstVec = !PrevElemFromFirstVec;
13881   }
13882   return true;
13883 }
13884 
13885 static bool isSplatBV(SDValue Op) {
13886   if (Op.getOpcode() != ISD::BUILD_VECTOR)
13887     return false;
13888   SDValue FirstOp;
13889 
13890   // Find first non-undef input.
13891   for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
13892     FirstOp = Op.getOperand(i);
13893     if (!FirstOp.isUndef())
13894       break;
13895   }
13896 
13897   // All inputs are undef or the same as the first non-undef input.
13898   for (int i = 1, e = Op.getNumOperands(); i < e; i++)
13899     if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
13900       return false;
13901   return true;
13902 }
13903 
13904 static SDValue isScalarToVec(SDValue Op) {
13905   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13906     return Op;
13907   if (Op.getOpcode() != ISD::BITCAST)
13908     return SDValue();
13909   Op = Op.getOperand(0);
13910   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13911     return Op;
13912   return SDValue();
13913 }
13914 
13915 static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
13916                                             int LHSMaxIdx, int RHSMinIdx,
13917                                             int RHSMaxIdx, int HalfVec) {
13918   for (int i = 0, e = ShuffV.size(); i < e; i++) {
13919     int Idx = ShuffV[i];
13920     if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
13921       ShuffV[i] += HalfVec;
13922   }
13923 }
13924 
13925 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
13926 // the original is:
13927 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
13928 // In such a case, just change the shuffle mask to extract the element
13929 // from the permuted index.
13930 static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
13931   SDLoc dl(OrigSToV);
13932   EVT VT = OrigSToV.getValueType();
13933   assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13934          "Expecting a SCALAR_TO_VECTOR here");
13935   SDValue Input = OrigSToV.getOperand(0);
13936 
13937   if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13938     ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
13939     SDValue OrigVector = Input.getOperand(0);
13940 
13941     // Can't handle non-const element indices or different vector types
13942     // for the input to the extract and the output of the scalar_to_vector.
13943     if (Idx && VT == OrigVector.getValueType()) {
13944       SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
13945       NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
13946       return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
13947     }
13948   }
13949   return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
13950                      OrigSToV.getOperand(0));
13951 }
13952 
13953 // On little endian subtargets, combine shuffles such as:
13954 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
13955 // into:
13956 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
13957 // because the latter can be matched to a single instruction merge.
13958 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
13959 // to put the value into element zero. Adjust the shuffle mask so that the
13960 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
13961 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
13962                                                 SelectionDAG &DAG) const {
13963   SDValue LHS = SVN->getOperand(0);
13964   SDValue RHS = SVN->getOperand(1);
13965   auto Mask = SVN->getMask();
13966   int NumElts = LHS.getValueType().getVectorNumElements();
13967   SDValue Res(SVN, 0);
13968   SDLoc dl(SVN);
13969 
13970   // None of these combines are useful on big endian systems since the ISA
13971   // already has a big endian bias.
13972   if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
13973     return Res;
13974 
13975   // If this is not a shuffle of a shuffle and the first element comes from
13976   // the second vector, canonicalize to the commuted form. This will make it
13977   // more likely to match one of the single instruction patterns.
13978   if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
13979       RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
13980     std::swap(LHS, RHS);
13981     Res = DAG.getCommutedVectorShuffle(*SVN);
13982     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
13983   }
13984 
13985   // Adjust the shuffle mask if either input vector comes from a
13986   // SCALAR_TO_VECTOR and keep the respective input vector in permuted
13987   // form (to prevent the need for a swap).
13988   SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
13989   SDValue SToVLHS = isScalarToVec(LHS);
13990   SDValue SToVRHS = isScalarToVec(RHS);
13991   if (SToVLHS || SToVRHS) {
13992     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
13993                             : SToVRHS.getValueType().getVectorNumElements();
13994     int NumEltsOut = ShuffV.size();
13995 
13996     // Initially assume that neither input is permuted. These will be adjusted
13997     // accordingly if either input is.
13998     int LHSMaxIdx = -1;
13999     int RHSMinIdx = -1;
14000     int RHSMaxIdx = -1;
14001     int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14002 
14003     // Get the permuted scalar to vector nodes for the source(s) that come from
14004     // ISD::SCALAR_TO_VECTOR.
14005     if (SToVLHS) {
14006       // Set up the values for the shuffle vector fixup.
14007       LHSMaxIdx = NumEltsOut / NumEltsIn;
14008       SToVLHS = getSToVPermuted(SToVLHS, DAG);
14009       if (SToVLHS.getValueType() != LHS.getValueType())
14010         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14011       LHS = SToVLHS;
14012     }
14013     if (SToVRHS) {
14014       RHSMinIdx = NumEltsOut;
14015       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14016       SToVRHS = getSToVPermuted(SToVRHS, DAG);
14017       if (SToVRHS.getValueType() != RHS.getValueType())
14018         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14019       RHS = SToVRHS;
14020     }
14021 
14022     // Fix up the shuffle mask to reflect where the desired element actually is.
14023     // The minimum and maximum indices that correspond to element zero for both
14024     // the LHS and RHS are computed and will control which shuffle mask entries
14025     // are to be changed. For example, if the RHS is permuted, any shuffle mask
14026     // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14027     // HalfVec to refer to the corresponding element in the permuted vector.
14028     fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14029                                     HalfVec);
14030     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14031 
14032     // We may have simplified away the shuffle. We won't be able to do anything
14033     // further with it here.
14034     if (!isa<ShuffleVectorSDNode>(Res))
14035       return Res;
14036     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14037   }
14038 
14039   // The common case after we commuted the shuffle is that the RHS is a splat
14040   // and we have elements coming in from the splat at indices that are not
14041   // conducive to using a merge.
14042   // Example:
14043   // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14044   if (!isSplatBV(RHS))
14045     return Res;
14046 
14047   // We are looking for a mask such that all even elements are from
14048   // one vector and all odd elements from the other.
14049   if (!isAlternatingShuffMask(Mask, NumElts))
14050     return Res;
14051 
14052   // Adjust the mask so we are pulling in the same index from the splat
14053   // as the index from the interesting vector in consecutive elements.
14054   // Example (even elements from first vector):
14055   // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14056   if (Mask[0] < NumElts)
14057     for (int i = 1, e = Mask.size(); i < e; i += 2)
14058       ShuffV[i] = (ShuffV[i - 1] + NumElts);
14059   // Example (odd elements from first vector):
14060   // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14061   else
14062     for (int i = 0, e = Mask.size(); i < e; i += 2)
14063       ShuffV[i] = (ShuffV[i + 1] + NumElts);
14064 
14065   // If the RHS has undefs, we need to remove them since we may have created
14066   // a shuffle that adds those instead of the splat value.
14067   SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14068   RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14069 
14070   Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14071   return Res;
14072 }
14073 
14074 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14075                                                 LSBaseSDNode *LSBase,
14076                                                 DAGCombinerInfo &DCI) const {
14077   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14078         "Not a reverse memop pattern!");
14079 
14080   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14081     auto Mask = SVN->getMask();
14082     int i = 0;
14083     auto I = Mask.rbegin();
14084     auto E = Mask.rend();
14085 
14086     for (; I != E; ++I) {
14087       if (*I != i)
14088         return false;
14089       i++;
14090     }
14091     return true;
14092   };
14093 
14094   SelectionDAG &DAG = DCI.DAG;
14095   EVT VT = SVN->getValueType(0);
14096 
14097   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14098     return SDValue();
14099 
14100   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14101   // See comment in PPCVSXSwapRemoval.cpp.
14102   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14103   if (!Subtarget.hasP9Vector())
14104     return SDValue();
14105 
14106   if(!IsElementReverse(SVN))
14107     return SDValue();
14108 
14109   if (LSBase->getOpcode() == ISD::LOAD) {
14110     SDLoc dl(SVN);
14111     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14112     return DAG.getMemIntrinsicNode(
14113         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14114         LSBase->getMemoryVT(), LSBase->getMemOperand());
14115   }
14116 
14117   if (LSBase->getOpcode() == ISD::STORE) {
14118     SDLoc dl(LSBase);
14119     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14120                           LSBase->getBasePtr()};
14121     return DAG.getMemIntrinsicNode(
14122         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14123         LSBase->getMemoryVT(), LSBase->getMemOperand());
14124   }
14125 
14126   llvm_unreachable("Expected a load or store node here");
14127 }
14128 
14129 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
14130                                              DAGCombinerInfo &DCI) const {
14131   SelectionDAG &DAG = DCI.DAG;
14132   SDLoc dl(N);
14133   switch (N->getOpcode()) {
14134   default: break;
14135   case ISD::ADD:
14136     return combineADD(N, DCI);
14137   case ISD::SHL:
14138     return combineSHL(N, DCI);
14139   case ISD::SRA:
14140     return combineSRA(N, DCI);
14141   case ISD::SRL:
14142     return combineSRL(N, DCI);
14143   case ISD::MUL:
14144     return combineMUL(N, DCI);
14145   case ISD::FMA:
14146   case PPCISD::FNMSUB:
14147     return combineFMALike(N, DCI);
14148   case PPCISD::SHL:
14149     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14150         return N->getOperand(0);
14151     break;
14152   case PPCISD::SRL:
14153     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14154         return N->getOperand(0);
14155     break;
14156   case PPCISD::SRA:
14157     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14158       if (C->isNullValue() ||   //  0 >>s V -> 0.
14159           C->isAllOnesValue())    // -1 >>s V -> -1.
14160         return N->getOperand(0);
14161     }
14162     break;
14163   case ISD::SIGN_EXTEND:
14164   case ISD::ZERO_EXTEND:
14165   case ISD::ANY_EXTEND:
14166     return DAGCombineExtBoolTrunc(N, DCI);
14167   case ISD::TRUNCATE:
14168     return combineTRUNCATE(N, DCI);
14169   case ISD::SETCC:
14170     if (SDValue CSCC = combineSetCC(N, DCI))
14171       return CSCC;
14172     LLVM_FALLTHROUGH;
14173   case ISD::SELECT_CC:
14174     return DAGCombineTruncBoolExt(N, DCI);
14175   case ISD::SINT_TO_FP:
14176   case ISD::UINT_TO_FP:
14177     return combineFPToIntToFP(N, DCI);
14178   case ISD::VECTOR_SHUFFLE:
14179     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14180       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14181       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14182     }
14183     return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14184   case ISD::STORE: {
14185 
14186     EVT Op1VT = N->getOperand(1).getValueType();
14187     unsigned Opcode = N->getOperand(1).getOpcode();
14188 
14189     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14190       SDValue Val= combineStoreFPToInt(N, DCI);
14191       if (Val)
14192         return Val;
14193     }
14194 
14195     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14196       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14197       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14198       if (Val)
14199         return Val;
14200     }
14201 
14202     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14203     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14204         N->getOperand(1).getNode()->hasOneUse() &&
14205         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14206          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14207 
14208       // STBRX can only handle simple types and it makes no sense to store less
14209       // two bytes in byte-reversed order.
14210       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14211       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14212         break;
14213 
14214       SDValue BSwapOp = N->getOperand(1).getOperand(0);
14215       // Do an any-extend to 32-bits if this is a half-word input.
14216       if (BSwapOp.getValueType() == MVT::i16)
14217         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14218 
14219       // If the type of BSWAP operand is wider than stored memory width
14220       // it need to be shifted to the right side before STBRX.
14221       if (Op1VT.bitsGT(mVT)) {
14222         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14223         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14224                               DAG.getConstant(Shift, dl, MVT::i32));
14225         // Need to truncate if this is a bswap of i64 stored as i32/i16.
14226         if (Op1VT == MVT::i64)
14227           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14228       }
14229 
14230       SDValue Ops[] = {
14231         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14232       };
14233       return
14234         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
14235                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14236                                 cast<StoreSDNode>(N)->getMemOperand());
14237     }
14238 
14239     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
14240     // So it can increase the chance of CSE constant construction.
14241     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14242         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14243       // Need to sign-extended to 64-bits to handle negative values.
14244       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14245       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14246                                     MemVT.getSizeInBits());
14247       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14248 
14249       // DAG.getTruncStore() can't be used here because it doesn't accept
14250       // the general (base + offset) addressing mode.
14251       // So we use UpdateNodeOperands and setTruncatingStore instead.
14252       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14253                              N->getOperand(3));
14254       cast<StoreSDNode>(N)->setTruncatingStore(true);
14255       return SDValue(N, 0);
14256     }
14257 
14258     // For little endian, VSX stores require generating xxswapd/lxvd2x.
14259     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14260     if (Op1VT.isSimple()) {
14261       MVT StoreVT = Op1VT.getSimpleVT();
14262       if (Subtarget.needsSwapsForVSXMemOps() &&
14263           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14264            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14265         return expandVSXStoreForLE(N, DCI);
14266     }
14267     break;
14268   }
14269   case ISD::LOAD: {
14270     LoadSDNode *LD = cast<LoadSDNode>(N);
14271     EVT VT = LD->getValueType(0);
14272 
14273     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14274     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14275     if (VT.isSimple()) {
14276       MVT LoadVT = VT.getSimpleVT();
14277       if (Subtarget.needsSwapsForVSXMemOps() &&
14278           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14279            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14280         return expandVSXLoadForLE(N, DCI);
14281     }
14282 
14283     // We sometimes end up with a 64-bit integer load, from which we extract
14284     // two single-precision floating-point numbers. This happens with
14285     // std::complex<float>, and other similar structures, because of the way we
14286     // canonicalize structure copies. However, if we lack direct moves,
14287     // then the final bitcasts from the extracted integer values to the
14288     // floating-point numbers turn into store/load pairs. Even with direct moves,
14289     // just loading the two floating-point numbers is likely better.
14290     auto ReplaceTwoFloatLoad = [&]() {
14291       if (VT != MVT::i64)
14292         return false;
14293 
14294       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14295           LD->isVolatile())
14296         return false;
14297 
14298       //  We're looking for a sequence like this:
14299       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14300       //      t16: i64 = srl t13, Constant:i32<32>
14301       //    t17: i32 = truncate t16
14302       //  t18: f32 = bitcast t17
14303       //    t19: i32 = truncate t13
14304       //  t20: f32 = bitcast t19
14305 
14306       if (!LD->hasNUsesOfValue(2, 0))
14307         return false;
14308 
14309       auto UI = LD->use_begin();
14310       while (UI.getUse().getResNo() != 0) ++UI;
14311       SDNode *Trunc = *UI++;
14312       while (UI.getUse().getResNo() != 0) ++UI;
14313       SDNode *RightShift = *UI;
14314       if (Trunc->getOpcode() != ISD::TRUNCATE)
14315         std::swap(Trunc, RightShift);
14316 
14317       if (Trunc->getOpcode() != ISD::TRUNCATE ||
14318           Trunc->getValueType(0) != MVT::i32 ||
14319           !Trunc->hasOneUse())
14320         return false;
14321       if (RightShift->getOpcode() != ISD::SRL ||
14322           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14323           RightShift->getConstantOperandVal(1) != 32 ||
14324           !RightShift->hasOneUse())
14325         return false;
14326 
14327       SDNode *Trunc2 = *RightShift->use_begin();
14328       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14329           Trunc2->getValueType(0) != MVT::i32 ||
14330           !Trunc2->hasOneUse())
14331         return false;
14332 
14333       SDNode *Bitcast = *Trunc->use_begin();
14334       SDNode *Bitcast2 = *Trunc2->use_begin();
14335 
14336       if (Bitcast->getOpcode() != ISD::BITCAST ||
14337           Bitcast->getValueType(0) != MVT::f32)
14338         return false;
14339       if (Bitcast2->getOpcode() != ISD::BITCAST ||
14340           Bitcast2->getValueType(0) != MVT::f32)
14341         return false;
14342 
14343       if (Subtarget.isLittleEndian())
14344         std::swap(Bitcast, Bitcast2);
14345 
14346       // Bitcast has the second float (in memory-layout order) and Bitcast2
14347       // has the first one.
14348 
14349       SDValue BasePtr = LD->getBasePtr();
14350       if (LD->isIndexed()) {
14351         assert(LD->getAddressingMode() == ISD::PRE_INC &&
14352                "Non-pre-inc AM on PPC?");
14353         BasePtr =
14354           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14355                       LD->getOffset());
14356       }
14357 
14358       auto MMOFlags =
14359           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14360       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14361                                       LD->getPointerInfo(), LD->getAlignment(),
14362                                       MMOFlags, LD->getAAInfo());
14363       SDValue AddPtr =
14364         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14365                     BasePtr, DAG.getIntPtrConstant(4, dl));
14366       SDValue FloatLoad2 = DAG.getLoad(
14367           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14368           LD->getPointerInfo().getWithOffset(4),
14369           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14370 
14371       if (LD->isIndexed()) {
14372         // Note that DAGCombine should re-form any pre-increment load(s) from
14373         // what is produced here if that makes sense.
14374         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14375       }
14376 
14377       DCI.CombineTo(Bitcast2, FloatLoad);
14378       DCI.CombineTo(Bitcast, FloatLoad2);
14379 
14380       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14381                                     SDValue(FloatLoad2.getNode(), 1));
14382       return true;
14383     };
14384 
14385     if (ReplaceTwoFloatLoad())
14386       return SDValue(N, 0);
14387 
14388     EVT MemVT = LD->getMemoryVT();
14389     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14390     Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14391     if (LD->isUnindexed() && VT.isVector() &&
14392         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14393           // P8 and later hardware should just use LOAD.
14394           !Subtarget.hasP8Vector() &&
14395           (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14396            VT == MVT::v4f32))) &&
14397         LD->getAlign() < ABIAlignment) {
14398       // This is a type-legal unaligned Altivec load.
14399       SDValue Chain = LD->getChain();
14400       SDValue Ptr = LD->getBasePtr();
14401       bool isLittleEndian = Subtarget.isLittleEndian();
14402 
14403       // This implements the loading of unaligned vectors as described in
14404       // the venerable Apple Velocity Engine overview. Specifically:
14405       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14406       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14407       //
14408       // The general idea is to expand a sequence of one or more unaligned
14409       // loads into an alignment-based permutation-control instruction (lvsl
14410       // or lvsr), a series of regular vector loads (which always truncate
14411       // their input address to an aligned address), and a series of
14412       // permutations.  The results of these permutations are the requested
14413       // loaded values.  The trick is that the last "extra" load is not taken
14414       // from the address you might suspect (sizeof(vector) bytes after the
14415       // last requested load), but rather sizeof(vector) - 1 bytes after the
14416       // last requested vector. The point of this is to avoid a page fault if
14417       // the base address happened to be aligned. This works because if the
14418       // base address is aligned, then adding less than a full vector length
14419       // will cause the last vector in the sequence to be (re)loaded.
14420       // Otherwise, the next vector will be fetched as you might suspect was
14421       // necessary.
14422 
14423       // We might be able to reuse the permutation generation from
14424       // a different base address offset from this one by an aligned amount.
14425       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14426       // optimization later.
14427       Intrinsic::ID Intr, IntrLD, IntrPerm;
14428       MVT PermCntlTy, PermTy, LDTy;
14429       Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14430                             : Intrinsic::ppc_altivec_lvsl;
14431       IntrLD = Intrinsic::ppc_altivec_lvx;
14432       IntrPerm = Intrinsic::ppc_altivec_vperm;
14433       PermCntlTy = MVT::v16i8;
14434       PermTy = MVT::v4i32;
14435       LDTy = MVT::v4i32;
14436 
14437       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
14438 
14439       // Create the new MMO for the new base load. It is like the original MMO,
14440       // but represents an area in memory almost twice the vector size centered
14441       // on the original address. If the address is unaligned, we might start
14442       // reading up to (sizeof(vector)-1) bytes below the address of the
14443       // original unaligned load.
14444       MachineFunction &MF = DAG.getMachineFunction();
14445       MachineMemOperand *BaseMMO =
14446         MF.getMachineMemOperand(LD->getMemOperand(),
14447                                 -(long)MemVT.getStoreSize()+1,
14448                                 2*MemVT.getStoreSize()-1);
14449 
14450       // Create the new base load.
14451       SDValue LDXIntID =
14452           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
14453       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14454       SDValue BaseLoad =
14455         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14456                                 DAG.getVTList(PermTy, MVT::Other),
14457                                 BaseLoadOps, LDTy, BaseMMO);
14458 
14459       // Note that the value of IncOffset (which is provided to the next
14460       // load's pointer info offset value, and thus used to calculate the
14461       // alignment), and the value of IncValue (which is actually used to
14462       // increment the pointer value) are different! This is because we
14463       // require the next load to appear to be aligned, even though it
14464       // is actually offset from the base pointer by a lesser amount.
14465       int IncOffset = VT.getSizeInBits() / 8;
14466       int IncValue = IncOffset;
14467 
14468       // Walk (both up and down) the chain looking for another load at the real
14469       // (aligned) offset (the alignment of the other load does not matter in
14470       // this case). If found, then do not use the offset reduction trick, as
14471       // that will prevent the loads from being later combined (as they would
14472       // otherwise be duplicates).
14473       if (!findConsecutiveLoad(LD, DAG))
14474         --IncValue;
14475 
14476       SDValue Increment =
14477           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
14478       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
14479 
14480       MachineMemOperand *ExtraMMO =
14481         MF.getMachineMemOperand(LD->getMemOperand(),
14482                                 1, 2*MemVT.getStoreSize()-1);
14483       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
14484       SDValue ExtraLoad =
14485         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14486                                 DAG.getVTList(PermTy, MVT::Other),
14487                                 ExtraLoadOps, LDTy, ExtraMMO);
14488 
14489       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
14490         BaseLoad.getValue(1), ExtraLoad.getValue(1));
14491 
14492       // Because vperm has a big-endian bias, we must reverse the order
14493       // of the input vectors and complement the permute control vector
14494       // when generating little endian code.  We have already handled the
14495       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
14496       // and ExtraLoad here.
14497       SDValue Perm;
14498       if (isLittleEndian)
14499         Perm = BuildIntrinsicOp(IntrPerm,
14500                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
14501       else
14502         Perm = BuildIntrinsicOp(IntrPerm,
14503                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
14504 
14505       if (VT != PermTy)
14506         Perm = Subtarget.hasAltivec()
14507                    ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
14508                    : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
14509                                  DAG.getTargetConstant(1, dl, MVT::i64));
14510                                // second argument is 1 because this rounding
14511                                // is always exact.
14512 
14513       // The output of the permutation is our loaded result, the TokenFactor is
14514       // our new chain.
14515       DCI.CombineTo(N, Perm, TF);
14516       return SDValue(N, 0);
14517     }
14518     }
14519     break;
14520     case ISD::INTRINSIC_WO_CHAIN: {
14521       bool isLittleEndian = Subtarget.isLittleEndian();
14522       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
14523       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14524                                            : Intrinsic::ppc_altivec_lvsl);
14525       if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
14526         SDValue Add = N->getOperand(1);
14527 
14528         int Bits = 4 /* 16 byte alignment */;
14529 
14530         if (DAG.MaskedValueIsZero(Add->getOperand(1),
14531                                   APInt::getAllOnesValue(Bits /* alignment */)
14532                                       .zext(Add.getScalarValueSizeInBits()))) {
14533           SDNode *BasePtr = Add->getOperand(0).getNode();
14534           for (SDNode::use_iterator UI = BasePtr->use_begin(),
14535                                     UE = BasePtr->use_end();
14536                UI != UE; ++UI) {
14537             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14538                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
14539                     IID) {
14540               // We've found another LVSL/LVSR, and this address is an aligned
14541               // multiple of that one. The results will be the same, so use the
14542               // one we've just found instead.
14543 
14544               return SDValue(*UI, 0);
14545             }
14546           }
14547         }
14548 
14549         if (isa<ConstantSDNode>(Add->getOperand(1))) {
14550           SDNode *BasePtr = Add->getOperand(0).getNode();
14551           for (SDNode::use_iterator UI = BasePtr->use_begin(),
14552                UE = BasePtr->use_end(); UI != UE; ++UI) {
14553             if (UI->getOpcode() == ISD::ADD &&
14554                 isa<ConstantSDNode>(UI->getOperand(1)) &&
14555                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
14556                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
14557                 (1ULL << Bits) == 0) {
14558               SDNode *OtherAdd = *UI;
14559               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
14560                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
14561                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14562                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
14563                   return SDValue(*VI, 0);
14564                 }
14565               }
14566             }
14567           }
14568         }
14569       }
14570 
14571       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
14572       // Expose the vabsduw/h/b opportunity for down stream
14573       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
14574           (IID == Intrinsic::ppc_altivec_vmaxsw ||
14575            IID == Intrinsic::ppc_altivec_vmaxsh ||
14576            IID == Intrinsic::ppc_altivec_vmaxsb)) {
14577         SDValue V1 = N->getOperand(1);
14578         SDValue V2 = N->getOperand(2);
14579         if ((V1.getSimpleValueType() == MVT::v4i32 ||
14580              V1.getSimpleValueType() == MVT::v8i16 ||
14581              V1.getSimpleValueType() == MVT::v16i8) &&
14582             V1.getSimpleValueType() == V2.getSimpleValueType()) {
14583           // (0-a, a)
14584           if (V1.getOpcode() == ISD::SUB &&
14585               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
14586               V1.getOperand(1) == V2) {
14587             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
14588           }
14589           // (a, 0-a)
14590           if (V2.getOpcode() == ISD::SUB &&
14591               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
14592               V2.getOperand(1) == V1) {
14593             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14594           }
14595           // (x-y, y-x)
14596           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
14597               V1.getOperand(0) == V2.getOperand(1) &&
14598               V1.getOperand(1) == V2.getOperand(0)) {
14599             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14600           }
14601         }
14602       }
14603     }
14604 
14605     break;
14606   case ISD::INTRINSIC_W_CHAIN:
14607     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14608     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14609     if (Subtarget.needsSwapsForVSXMemOps()) {
14610       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14611       default:
14612         break;
14613       case Intrinsic::ppc_vsx_lxvw4x:
14614       case Intrinsic::ppc_vsx_lxvd2x:
14615         return expandVSXLoadForLE(N, DCI);
14616       }
14617     }
14618     break;
14619   case ISD::INTRINSIC_VOID:
14620     // For little endian, VSX stores require generating xxswapd/stxvd2x.
14621     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14622     if (Subtarget.needsSwapsForVSXMemOps()) {
14623       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14624       default:
14625         break;
14626       case Intrinsic::ppc_vsx_stxvw4x:
14627       case Intrinsic::ppc_vsx_stxvd2x:
14628         return expandVSXStoreForLE(N, DCI);
14629       }
14630     }
14631     break;
14632   case ISD::BSWAP:
14633     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
14634     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
14635         N->getOperand(0).hasOneUse() &&
14636         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
14637          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
14638           N->getValueType(0) == MVT::i64))) {
14639       SDValue Load = N->getOperand(0);
14640       LoadSDNode *LD = cast<LoadSDNode>(Load);
14641       // Create the byte-swapping load.
14642       SDValue Ops[] = {
14643         LD->getChain(),    // Chain
14644         LD->getBasePtr(),  // Ptr
14645         DAG.getValueType(N->getValueType(0)) // VT
14646       };
14647       SDValue BSLoad =
14648         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
14649                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
14650                                               MVT::i64 : MVT::i32, MVT::Other),
14651                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
14652 
14653       // If this is an i16 load, insert the truncate.
14654       SDValue ResVal = BSLoad;
14655       if (N->getValueType(0) == MVT::i16)
14656         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
14657 
14658       // First, combine the bswap away.  This makes the value produced by the
14659       // load dead.
14660       DCI.CombineTo(N, ResVal);
14661 
14662       // Next, combine the load away, we give it a bogus result value but a real
14663       // chain result.  The result value is dead because the bswap is dead.
14664       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
14665 
14666       // Return N so it doesn't get rechecked!
14667       return SDValue(N, 0);
14668     }
14669     break;
14670   case PPCISD::VCMP:
14671     // If a VCMP_rec node already exists with exactly the same operands as this
14672     // node, use its result instead of this node (VCMP_rec computes both a CR6
14673     // and a normal output).
14674     //
14675     if (!N->getOperand(0).hasOneUse() &&
14676         !N->getOperand(1).hasOneUse() &&
14677         !N->getOperand(2).hasOneUse()) {
14678 
14679       // Scan all of the users of the LHS, looking for VCMP_rec's that match.
14680       SDNode *VCMPrecNode = nullptr;
14681 
14682       SDNode *LHSN = N->getOperand(0).getNode();
14683       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
14684            UI != E; ++UI)
14685         if (UI->getOpcode() == PPCISD::VCMP_rec &&
14686             UI->getOperand(1) == N->getOperand(1) &&
14687             UI->getOperand(2) == N->getOperand(2) &&
14688             UI->getOperand(0) == N->getOperand(0)) {
14689           VCMPrecNode = *UI;
14690           break;
14691         }
14692 
14693       // If there is no VCMP_rec node, or if the flag value has a single use,
14694       // don't transform this.
14695       if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
14696         break;
14697 
14698       // Look at the (necessarily single) use of the flag value.  If it has a
14699       // chain, this transformation is more complex.  Note that multiple things
14700       // could use the value result, which we should ignore.
14701       SDNode *FlagUser = nullptr;
14702       for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
14703            FlagUser == nullptr; ++UI) {
14704         assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
14705         SDNode *User = *UI;
14706         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
14707           if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
14708             FlagUser = User;
14709             break;
14710           }
14711         }
14712       }
14713 
14714       // If the user is a MFOCRF instruction, we know this is safe.
14715       // Otherwise we give up for right now.
14716       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
14717         return SDValue(VCMPrecNode, 0);
14718     }
14719     break;
14720   case ISD::BRCOND: {
14721     SDValue Cond = N->getOperand(1);
14722     SDValue Target = N->getOperand(2);
14723 
14724     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14725         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
14726           Intrinsic::loop_decrement) {
14727 
14728       // We now need to make the intrinsic dead (it cannot be instruction
14729       // selected).
14730       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
14731       assert(Cond.getNode()->hasOneUse() &&
14732              "Counter decrement has more than one use");
14733 
14734       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
14735                          N->getOperand(0), Target);
14736     }
14737   }
14738   break;
14739   case ISD::BR_CC: {
14740     // If this is a branch on an altivec predicate comparison, lower this so
14741     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
14742     // lowering is done pre-legalize, because the legalizer lowers the predicate
14743     // compare down to code that is difficult to reassemble.
14744     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
14745     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
14746 
14747     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
14748     // value. If so, pass-through the AND to get to the intrinsic.
14749     if (LHS.getOpcode() == ISD::AND &&
14750         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14751         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
14752           Intrinsic::loop_decrement &&
14753         isa<ConstantSDNode>(LHS.getOperand(1)) &&
14754         !isNullConstant(LHS.getOperand(1)))
14755       LHS = LHS.getOperand(0);
14756 
14757     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14758         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
14759           Intrinsic::loop_decrement &&
14760         isa<ConstantSDNode>(RHS)) {
14761       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
14762              "Counter decrement comparison is not EQ or NE");
14763 
14764       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14765       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
14766                     (CC == ISD::SETNE && !Val);
14767 
14768       // We now need to make the intrinsic dead (it cannot be instruction
14769       // selected).
14770       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
14771       assert(LHS.getNode()->hasOneUse() &&
14772              "Counter decrement has more than one use");
14773 
14774       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
14775                          N->getOperand(0), N->getOperand(4));
14776     }
14777 
14778     int CompareOpc;
14779     bool isDot;
14780 
14781     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14782         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
14783         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
14784       assert(isDot && "Can't compare against a vector result!");
14785 
14786       // If this is a comparison against something other than 0/1, then we know
14787       // that the condition is never/always true.
14788       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14789       if (Val != 0 && Val != 1) {
14790         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
14791           return N->getOperand(0);
14792         // Always !=, turn it into an unconditional branch.
14793         return DAG.getNode(ISD::BR, dl, MVT::Other,
14794                            N->getOperand(0), N->getOperand(4));
14795       }
14796 
14797       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
14798 
14799       // Create the PPCISD altivec 'dot' comparison node.
14800       SDValue Ops[] = {
14801         LHS.getOperand(2),  // LHS of compare
14802         LHS.getOperand(3),  // RHS of compare
14803         DAG.getConstant(CompareOpc, dl, MVT::i32)
14804       };
14805       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
14806       SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
14807 
14808       // Unpack the result based on how the target uses it.
14809       PPC::Predicate CompOpc;
14810       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
14811       default:  // Can't happen, don't crash on invalid number though.
14812       case 0:   // Branch on the value of the EQ bit of CR6.
14813         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
14814         break;
14815       case 1:   // Branch on the inverted value of the EQ bit of CR6.
14816         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
14817         break;
14818       case 2:   // Branch on the value of the LT bit of CR6.
14819         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
14820         break;
14821       case 3:   // Branch on the inverted value of the LT bit of CR6.
14822         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
14823         break;
14824       }
14825 
14826       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
14827                          DAG.getConstant(CompOpc, dl, MVT::i32),
14828                          DAG.getRegister(PPC::CR6, MVT::i32),
14829                          N->getOperand(4), CompNode.getValue(1));
14830     }
14831     break;
14832   }
14833   case ISD::BUILD_VECTOR:
14834     return DAGCombineBuildVector(N, DCI);
14835   case ISD::ABS:
14836     return combineABS(N, DCI);
14837   case ISD::VSELECT:
14838     return combineVSelect(N, DCI);
14839   }
14840 
14841   return SDValue();
14842 }
14843 
14844 SDValue
14845 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
14846                                  SelectionDAG &DAG,
14847                                  SmallVectorImpl<SDNode *> &Created) const {
14848   // fold (sdiv X, pow2)
14849   EVT VT = N->getValueType(0);
14850   if (VT == MVT::i64 && !Subtarget.isPPC64())
14851     return SDValue();
14852   if ((VT != MVT::i32 && VT != MVT::i64) ||
14853       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
14854     return SDValue();
14855 
14856   SDLoc DL(N);
14857   SDValue N0 = N->getOperand(0);
14858 
14859   bool IsNegPow2 = (-Divisor).isPowerOf2();
14860   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
14861   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
14862 
14863   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
14864   Created.push_back(Op.getNode());
14865 
14866   if (IsNegPow2) {
14867     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
14868     Created.push_back(Op.getNode());
14869   }
14870 
14871   return Op;
14872 }
14873 
14874 //===----------------------------------------------------------------------===//
14875 // Inline Assembly Support
14876 //===----------------------------------------------------------------------===//
14877 
14878 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
14879                                                       KnownBits &Known,
14880                                                       const APInt &DemandedElts,
14881                                                       const SelectionDAG &DAG,
14882                                                       unsigned Depth) const {
14883   Known.resetAll();
14884   switch (Op.getOpcode()) {
14885   default: break;
14886   case PPCISD::LBRX: {
14887     // lhbrx is known to have the top bits cleared out.
14888     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
14889       Known.Zero = 0xFFFF0000;
14890     break;
14891   }
14892   case ISD::INTRINSIC_WO_CHAIN: {
14893     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
14894     default: break;
14895     case Intrinsic::ppc_altivec_vcmpbfp_p:
14896     case Intrinsic::ppc_altivec_vcmpeqfp_p:
14897     case Intrinsic::ppc_altivec_vcmpequb_p:
14898     case Intrinsic::ppc_altivec_vcmpequh_p:
14899     case Intrinsic::ppc_altivec_vcmpequw_p:
14900     case Intrinsic::ppc_altivec_vcmpequd_p:
14901     case Intrinsic::ppc_altivec_vcmpequq_p:
14902     case Intrinsic::ppc_altivec_vcmpgefp_p:
14903     case Intrinsic::ppc_altivec_vcmpgtfp_p:
14904     case Intrinsic::ppc_altivec_vcmpgtsb_p:
14905     case Intrinsic::ppc_altivec_vcmpgtsh_p:
14906     case Intrinsic::ppc_altivec_vcmpgtsw_p:
14907     case Intrinsic::ppc_altivec_vcmpgtsd_p:
14908     case Intrinsic::ppc_altivec_vcmpgtsq_p:
14909     case Intrinsic::ppc_altivec_vcmpgtub_p:
14910     case Intrinsic::ppc_altivec_vcmpgtuh_p:
14911     case Intrinsic::ppc_altivec_vcmpgtuw_p:
14912     case Intrinsic::ppc_altivec_vcmpgtud_p:
14913     case Intrinsic::ppc_altivec_vcmpgtuq_p:
14914       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
14915       break;
14916     }
14917   }
14918   }
14919 }
14920 
14921 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
14922   switch (Subtarget.getCPUDirective()) {
14923   default: break;
14924   case PPC::DIR_970:
14925   case PPC::DIR_PWR4:
14926   case PPC::DIR_PWR5:
14927   case PPC::DIR_PWR5X:
14928   case PPC::DIR_PWR6:
14929   case PPC::DIR_PWR6X:
14930   case PPC::DIR_PWR7:
14931   case PPC::DIR_PWR8:
14932   case PPC::DIR_PWR9:
14933   case PPC::DIR_PWR10:
14934   case PPC::DIR_PWR_FUTURE: {
14935     if (!ML)
14936       break;
14937 
14938     if (!DisableInnermostLoopAlign32) {
14939       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14940       // so that we can decrease cache misses and branch-prediction misses.
14941       // Actual alignment of the loop will depend on the hotness check and other
14942       // logic in alignBlocks.
14943       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
14944         return Align(32);
14945     }
14946 
14947     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
14948 
14949     // For small loops (between 5 and 8 instructions), align to a 32-byte
14950     // boundary so that the entire loop fits in one instruction-cache line.
14951     uint64_t LoopSize = 0;
14952     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
14953       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
14954         LoopSize += TII->getInstSizeInBytes(*J);
14955         if (LoopSize > 32)
14956           break;
14957       }
14958 
14959     if (LoopSize > 16 && LoopSize <= 32)
14960       return Align(32);
14961 
14962     break;
14963   }
14964   }
14965 
14966   return TargetLowering::getPrefLoopAlignment(ML);
14967 }
14968 
14969 /// getConstraintType - Given a constraint, return the type of
14970 /// constraint it is for this target.
14971 PPCTargetLowering::ConstraintType
14972 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
14973   if (Constraint.size() == 1) {
14974     switch (Constraint[0]) {
14975     default: break;
14976     case 'b':
14977     case 'r':
14978     case 'f':
14979     case 'd':
14980     case 'v':
14981     case 'y':
14982       return C_RegisterClass;
14983     case 'Z':
14984       // FIXME: While Z does indicate a memory constraint, it specifically
14985       // indicates an r+r address (used in conjunction with the 'y' modifier
14986       // in the replacement string). Currently, we're forcing the base
14987       // register to be r0 in the asm printer (which is interpreted as zero)
14988       // and forming the complete address in the second register. This is
14989       // suboptimal.
14990       return C_Memory;
14991     }
14992   } else if (Constraint == "wc") { // individual CR bits.
14993     return C_RegisterClass;
14994   } else if (Constraint == "wa" || Constraint == "wd" ||
14995              Constraint == "wf" || Constraint == "ws" ||
14996              Constraint == "wi" || Constraint == "ww") {
14997     return C_RegisterClass; // VSX registers.
14998   }
14999   return TargetLowering::getConstraintType(Constraint);
15000 }
15001 
15002 /// Examine constraint type and operand type and determine a weight value.
15003 /// This object must already have been set up with the operand type
15004 /// and the current alternative constraint selected.
15005 TargetLowering::ConstraintWeight
15006 PPCTargetLowering::getSingleConstraintMatchWeight(
15007     AsmOperandInfo &info, const char *constraint) const {
15008   ConstraintWeight weight = CW_Invalid;
15009   Value *CallOperandVal = info.CallOperandVal;
15010     // If we don't have a value, we can't do a match,
15011     // but allow it at the lowest weight.
15012   if (!CallOperandVal)
15013     return CW_Default;
15014   Type *type = CallOperandVal->getType();
15015 
15016   // Look at the constraint type.
15017   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15018     return CW_Register; // an individual CR bit.
15019   else if ((StringRef(constraint) == "wa" ||
15020             StringRef(constraint) == "wd" ||
15021             StringRef(constraint) == "wf") &&
15022            type->isVectorTy())
15023     return CW_Register;
15024   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15025     return CW_Register; // just hold 64-bit integers data.
15026   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15027     return CW_Register;
15028   else if (StringRef(constraint) == "ww" && type->isFloatTy())
15029     return CW_Register;
15030 
15031   switch (*constraint) {
15032   default:
15033     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15034     break;
15035   case 'b':
15036     if (type->isIntegerTy())
15037       weight = CW_Register;
15038     break;
15039   case 'f':
15040     if (type->isFloatTy())
15041       weight = CW_Register;
15042     break;
15043   case 'd':
15044     if (type->isDoubleTy())
15045       weight = CW_Register;
15046     break;
15047   case 'v':
15048     if (type->isVectorTy())
15049       weight = CW_Register;
15050     break;
15051   case 'y':
15052     weight = CW_Register;
15053     break;
15054   case 'Z':
15055     weight = CW_Memory;
15056     break;
15057   }
15058   return weight;
15059 }
15060 
15061 std::pair<unsigned, const TargetRegisterClass *>
15062 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
15063                                                 StringRef Constraint,
15064                                                 MVT VT) const {
15065   if (Constraint.size() == 1) {
15066     // GCC RS6000 Constraint Letters
15067     switch (Constraint[0]) {
15068     case 'b':   // R1-R31
15069       if (VT == MVT::i64 && Subtarget.isPPC64())
15070         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15071       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15072     case 'r':   // R0-R31
15073       if (VT == MVT::i64 && Subtarget.isPPC64())
15074         return std::make_pair(0U, &PPC::G8RCRegClass);
15075       return std::make_pair(0U, &PPC::GPRCRegClass);
15076     // 'd' and 'f' constraints are both defined to be "the floating point
15077     // registers", where one is for 32-bit and the other for 64-bit. We don't
15078     // really care overly much here so just give them all the same reg classes.
15079     case 'd':
15080     case 'f':
15081       if (Subtarget.hasSPE()) {
15082         if (VT == MVT::f32 || VT == MVT::i32)
15083           return std::make_pair(0U, &PPC::GPRCRegClass);
15084         if (VT == MVT::f64 || VT == MVT::i64)
15085           return std::make_pair(0U, &PPC::SPERCRegClass);
15086       } else {
15087         if (VT == MVT::f32 || VT == MVT::i32)
15088           return std::make_pair(0U, &PPC::F4RCRegClass);
15089         if (VT == MVT::f64 || VT == MVT::i64)
15090           return std::make_pair(0U, &PPC::F8RCRegClass);
15091       }
15092       break;
15093     case 'v':
15094       if (Subtarget.hasAltivec())
15095         return std::make_pair(0U, &PPC::VRRCRegClass);
15096       break;
15097     case 'y':   // crrc
15098       return std::make_pair(0U, &PPC::CRRCRegClass);
15099     }
15100   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15101     // An individual CR bit.
15102     return std::make_pair(0U, &PPC::CRBITRCRegClass);
15103   } else if ((Constraint == "wa" || Constraint == "wd" ||
15104              Constraint == "wf" || Constraint == "wi") &&
15105              Subtarget.hasVSX()) {
15106     return std::make_pair(0U, &PPC::VSRCRegClass);
15107   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15108     if (VT == MVT::f32 && Subtarget.hasP8Vector())
15109       return std::make_pair(0U, &PPC::VSSRCRegClass);
15110     else
15111       return std::make_pair(0U, &PPC::VSFRCRegClass);
15112   }
15113 
15114   // Handle special cases of physical registers that are not properly handled
15115   // by the base class.
15116   if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
15117     // If we name a VSX register, we can't defer to the base class because it
15118     // will not recognize the correct register (their names will be VSL{0-31}
15119     // and V{0-31} so they won't match). So we match them here.
15120     if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15121       int VSNum = atoi(Constraint.data() + 3);
15122       assert(VSNum >= 0 && VSNum <= 63 &&
15123              "Attempted to access a vsr out of range");
15124       if (VSNum < 32)
15125         return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15126       return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15127     }
15128 
15129     // For float registers, we can't defer to the base class as it will match
15130     // the SPILLTOVSRRC class.
15131     if (Constraint.size() > 3 && Constraint[1] == 'f') {
15132       int RegNum = atoi(Constraint.data() + 2);
15133       if (RegNum > 31 || RegNum < 0)
15134         report_fatal_error("Invalid floating point register number");
15135       if (VT == MVT::f32 || VT == MVT::i32)
15136         return Subtarget.hasSPE()
15137                    ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
15138                    : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
15139       if (VT == MVT::f64 || VT == MVT::i64)
15140         return Subtarget.hasSPE()
15141                    ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
15142                    : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
15143     }
15144   }
15145 
15146   std::pair<unsigned, const TargetRegisterClass *> R =
15147       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
15148 
15149   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15150   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15151   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15152   // register.
15153   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15154   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15155   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15156       PPC::GPRCRegClass.contains(R.first))
15157     return std::make_pair(TRI->getMatchingSuperReg(R.first,
15158                             PPC::sub_32, &PPC::G8RCRegClass),
15159                           &PPC::G8RCRegClass);
15160 
15161   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15162   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15163     R.first = PPC::CR0;
15164     R.second = &PPC::CRRCRegClass;
15165   }
15166 
15167   return R;
15168 }
15169 
15170 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15171 /// vector.  If it is invalid, don't add anything to Ops.
15172 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15173                                                      std::string &Constraint,
15174                                                      std::vector<SDValue>&Ops,
15175                                                      SelectionDAG &DAG) const {
15176   SDValue Result;
15177 
15178   // Only support length 1 constraints.
15179   if (Constraint.length() > 1) return;
15180 
15181   char Letter = Constraint[0];
15182   switch (Letter) {
15183   default: break;
15184   case 'I':
15185   case 'J':
15186   case 'K':
15187   case 'L':
15188   case 'M':
15189   case 'N':
15190   case 'O':
15191   case 'P': {
15192     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15193     if (!CST) return; // Must be an immediate to match.
15194     SDLoc dl(Op);
15195     int64_t Value = CST->getSExtValue();
15196     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15197                          // numbers are printed as such.
15198     switch (Letter) {
15199     default: llvm_unreachable("Unknown constraint letter!");
15200     case 'I':  // "I" is a signed 16-bit constant.
15201       if (isInt<16>(Value))
15202         Result = DAG.getTargetConstant(Value, dl, TCVT);
15203       break;
15204     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
15205       if (isShiftedUInt<16, 16>(Value))
15206         Result = DAG.getTargetConstant(Value, dl, TCVT);
15207       break;
15208     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
15209       if (isShiftedInt<16, 16>(Value))
15210         Result = DAG.getTargetConstant(Value, dl, TCVT);
15211       break;
15212     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
15213       if (isUInt<16>(Value))
15214         Result = DAG.getTargetConstant(Value, dl, TCVT);
15215       break;
15216     case 'M':  // "M" is a constant that is greater than 31.
15217       if (Value > 31)
15218         Result = DAG.getTargetConstant(Value, dl, TCVT);
15219       break;
15220     case 'N':  // "N" is a positive constant that is an exact power of two.
15221       if (Value > 0 && isPowerOf2_64(Value))
15222         Result = DAG.getTargetConstant(Value, dl, TCVT);
15223       break;
15224     case 'O':  // "O" is the constant zero.
15225       if (Value == 0)
15226         Result = DAG.getTargetConstant(Value, dl, TCVT);
15227       break;
15228     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
15229       if (isInt<16>(-Value))
15230         Result = DAG.getTargetConstant(Value, dl, TCVT);
15231       break;
15232     }
15233     break;
15234   }
15235   }
15236 
15237   if (Result.getNode()) {
15238     Ops.push_back(Result);
15239     return;
15240   }
15241 
15242   // Handle standard constraint letters.
15243   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15244 }
15245 
15246 // isLegalAddressingMode - Return true if the addressing mode represented
15247 // by AM is legal for this target, for a load/store of the specified type.
15248 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
15249                                               const AddrMode &AM, Type *Ty,
15250                                               unsigned AS,
15251                                               Instruction *I) const {
15252   // Vector type r+i form is supported since power9 as DQ form. We don't check
15253   // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15254   // imm form is preferred and the offset can be adjusted to use imm form later
15255   // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15256   // max offset to check legal addressing mode, we should be a little aggressive
15257   // to contain other offsets for that LSRUse.
15258   if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15259     return false;
15260 
15261   // PPC allows a sign-extended 16-bit immediate field.
15262   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15263     return false;
15264 
15265   // No global is ever allowed as a base.
15266   if (AM.BaseGV)
15267     return false;
15268 
15269   // PPC only support r+r,
15270   switch (AM.Scale) {
15271   case 0:  // "r+i" or just "i", depending on HasBaseReg.
15272     break;
15273   case 1:
15274     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
15275       return false;
15276     // Otherwise we have r+r or r+i.
15277     break;
15278   case 2:
15279     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
15280       return false;
15281     // Allow 2*r as r+r.
15282     break;
15283   default:
15284     // No other scales are supported.
15285     return false;
15286   }
15287 
15288   return true;
15289 }
15290 
15291 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15292                                            SelectionDAG &DAG) const {
15293   MachineFunction &MF = DAG.getMachineFunction();
15294   MachineFrameInfo &MFI = MF.getFrameInfo();
15295   MFI.setReturnAddressIsTaken(true);
15296 
15297   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
15298     return SDValue();
15299 
15300   SDLoc dl(Op);
15301   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15302 
15303   // Make sure the function does not optimize away the store of the RA to
15304   // the stack.
15305   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15306   FuncInfo->setLRStoreRequired();
15307   bool isPPC64 = Subtarget.isPPC64();
15308   auto PtrVT = getPointerTy(MF.getDataLayout());
15309 
15310   if (Depth > 0) {
15311     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15312     SDValue Offset =
15313         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15314                         isPPC64 ? MVT::i64 : MVT::i32);
15315     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15316                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15317                        MachinePointerInfo());
15318   }
15319 
15320   // Just load the return address off the stack.
15321   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15322   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15323                      MachinePointerInfo());
15324 }
15325 
15326 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15327                                           SelectionDAG &DAG) const {
15328   SDLoc dl(Op);
15329   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15330 
15331   MachineFunction &MF = DAG.getMachineFunction();
15332   MachineFrameInfo &MFI = MF.getFrameInfo();
15333   MFI.setFrameAddressIsTaken(true);
15334 
15335   EVT PtrVT = getPointerTy(MF.getDataLayout());
15336   bool isPPC64 = PtrVT == MVT::i64;
15337 
15338   // Naked functions never have a frame pointer, and so we use r1. For all
15339   // other functions, this decision must be delayed until during PEI.
15340   unsigned FrameReg;
15341   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15342     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15343   else
15344     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15345 
15346   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15347                                          PtrVT);
15348   while (Depth--)
15349     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15350                             FrameAddr, MachinePointerInfo());
15351   return FrameAddr;
15352 }
15353 
15354 // FIXME? Maybe this could be a TableGen attribute on some registers and
15355 // this table could be generated automatically from RegInfo.
15356 Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
15357                                               const MachineFunction &MF) const {
15358   bool isPPC64 = Subtarget.isPPC64();
15359 
15360   bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15361   if (!is64Bit && VT != LLT::scalar(32))
15362     report_fatal_error("Invalid register global variable type");
15363 
15364   Register Reg = StringSwitch<Register>(RegName)
15365                      .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15366                      .Case("r2", isPPC64 ? Register() : PPC::R2)
15367                      .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15368                      .Default(Register());
15369 
15370   if (Reg)
15371     return Reg;
15372   report_fatal_error("Invalid register name global variable");
15373 }
15374 
15375 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
15376   // 32-bit SVR4 ABI access everything as got-indirect.
15377   if (Subtarget.is32BitELFABI())
15378     return true;
15379 
15380   // AIX accesses everything indirectly through the TOC, which is similar to
15381   // the GOT.
15382   if (Subtarget.isAIXABI())
15383     return true;
15384 
15385   CodeModel::Model CModel = getTargetMachine().getCodeModel();
15386   // If it is small or large code model, module locals are accessed
15387   // indirectly by loading their address from .toc/.got.
15388   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15389     return true;
15390 
15391   // JumpTable and BlockAddress are accessed as got-indirect.
15392   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15393     return true;
15394 
15395   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15396     return Subtarget.isGVIndirectSymbol(G->getGlobal());
15397 
15398   return false;
15399 }
15400 
15401 bool
15402 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
15403   // The PowerPC target isn't yet aware of offsets.
15404   return false;
15405 }
15406 
15407 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
15408                                            const CallInst &I,
15409                                            MachineFunction &MF,
15410                                            unsigned Intrinsic) const {
15411   switch (Intrinsic) {
15412   case Intrinsic::ppc_altivec_lvx:
15413   case Intrinsic::ppc_altivec_lvxl:
15414   case Intrinsic::ppc_altivec_lvebx:
15415   case Intrinsic::ppc_altivec_lvehx:
15416   case Intrinsic::ppc_altivec_lvewx:
15417   case Intrinsic::ppc_vsx_lxvd2x:
15418   case Intrinsic::ppc_vsx_lxvw4x:
15419   case Intrinsic::ppc_vsx_lxvd2x_be:
15420   case Intrinsic::ppc_vsx_lxvw4x_be:
15421   case Intrinsic::ppc_vsx_lxvl:
15422   case Intrinsic::ppc_vsx_lxvll: {
15423     EVT VT;
15424     switch (Intrinsic) {
15425     case Intrinsic::ppc_altivec_lvebx:
15426       VT = MVT::i8;
15427       break;
15428     case Intrinsic::ppc_altivec_lvehx:
15429       VT = MVT::i16;
15430       break;
15431     case Intrinsic::ppc_altivec_lvewx:
15432       VT = MVT::i32;
15433       break;
15434     case Intrinsic::ppc_vsx_lxvd2x:
15435     case Intrinsic::ppc_vsx_lxvd2x_be:
15436       VT = MVT::v2f64;
15437       break;
15438     default:
15439       VT = MVT::v4i32;
15440       break;
15441     }
15442 
15443     Info.opc = ISD::INTRINSIC_W_CHAIN;
15444     Info.memVT = VT;
15445     Info.ptrVal = I.getArgOperand(0);
15446     Info.offset = -VT.getStoreSize()+1;
15447     Info.size = 2*VT.getStoreSize()-1;
15448     Info.align = Align(1);
15449     Info.flags = MachineMemOperand::MOLoad;
15450     return true;
15451   }
15452   case Intrinsic::ppc_altivec_stvx:
15453   case Intrinsic::ppc_altivec_stvxl:
15454   case Intrinsic::ppc_altivec_stvebx:
15455   case Intrinsic::ppc_altivec_stvehx:
15456   case Intrinsic::ppc_altivec_stvewx:
15457   case Intrinsic::ppc_vsx_stxvd2x:
15458   case Intrinsic::ppc_vsx_stxvw4x:
15459   case Intrinsic::ppc_vsx_stxvd2x_be:
15460   case Intrinsic::ppc_vsx_stxvw4x_be:
15461   case Intrinsic::ppc_vsx_stxvl:
15462   case Intrinsic::ppc_vsx_stxvll: {
15463     EVT VT;
15464     switch (Intrinsic) {
15465     case Intrinsic::ppc_altivec_stvebx:
15466       VT = MVT::i8;
15467       break;
15468     case Intrinsic::ppc_altivec_stvehx:
15469       VT = MVT::i16;
15470       break;
15471     case Intrinsic::ppc_altivec_stvewx:
15472       VT = MVT::i32;
15473       break;
15474     case Intrinsic::ppc_vsx_stxvd2x:
15475     case Intrinsic::ppc_vsx_stxvd2x_be:
15476       VT = MVT::v2f64;
15477       break;
15478     default:
15479       VT = MVT::v4i32;
15480       break;
15481     }
15482 
15483     Info.opc = ISD::INTRINSIC_VOID;
15484     Info.memVT = VT;
15485     Info.ptrVal = I.getArgOperand(1);
15486     Info.offset = -VT.getStoreSize()+1;
15487     Info.size = 2*VT.getStoreSize()-1;
15488     Info.align = Align(1);
15489     Info.flags = MachineMemOperand::MOStore;
15490     return true;
15491   }
15492   default:
15493     break;
15494   }
15495 
15496   return false;
15497 }
15498 
15499 /// It returns EVT::Other if the type should be determined using generic
15500 /// target-independent logic.
15501 EVT PPCTargetLowering::getOptimalMemOpType(
15502     const MemOp &Op, const AttributeList &FuncAttributes) const {
15503   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
15504     // We should use Altivec/VSX loads and stores when available. For unaligned
15505     // addresses, unaligned VSX loads are only fast starting with the P8.
15506     if (Subtarget.hasAltivec() && Op.size() >= 16 &&
15507         (Op.isAligned(Align(16)) ||
15508          ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
15509       return MVT::v4i32;
15510   }
15511 
15512   if (Subtarget.isPPC64()) {
15513     return MVT::i64;
15514   }
15515 
15516   return MVT::i32;
15517 }
15518 
15519 /// Returns true if it is beneficial to convert a load of a constant
15520 /// to just the constant itself.
15521 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
15522                                                           Type *Ty) const {
15523   assert(Ty->isIntegerTy());
15524 
15525   unsigned BitSize = Ty->getPrimitiveSizeInBits();
15526   return !(BitSize == 0 || BitSize > 64);
15527 }
15528 
15529 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
15530   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15531     return false;
15532   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15533   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15534   return NumBits1 == 64 && NumBits2 == 32;
15535 }
15536 
15537 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
15538   if (!VT1.isInteger() || !VT2.isInteger())
15539     return false;
15540   unsigned NumBits1 = VT1.getSizeInBits();
15541   unsigned NumBits2 = VT2.getSizeInBits();
15542   return NumBits1 == 64 && NumBits2 == 32;
15543 }
15544 
15545 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
15546   // Generally speaking, zexts are not free, but they are free when they can be
15547   // folded with other operations.
15548   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
15549     EVT MemVT = LD->getMemoryVT();
15550     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
15551          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
15552         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
15553          LD->getExtensionType() == ISD::ZEXTLOAD))
15554       return true;
15555   }
15556 
15557   // FIXME: Add other cases...
15558   //  - 32-bit shifts with a zext to i64
15559   //  - zext after ctlz, bswap, etc.
15560   //  - zext after and by a constant mask
15561 
15562   return TargetLowering::isZExtFree(Val, VT2);
15563 }
15564 
15565 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
15566   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
15567          "invalid fpext types");
15568   // Extending to float128 is not free.
15569   if (DestVT == MVT::f128)
15570     return false;
15571   return true;
15572 }
15573 
15574 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
15575   return isInt<16>(Imm) || isUInt<16>(Imm);
15576 }
15577 
15578 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
15579   return isInt<16>(Imm) || isUInt<16>(Imm);
15580 }
15581 
15582 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
15583                                                        MachineMemOperand::Flags,
15584                                                        bool *Fast) const {
15585   if (DisablePPCUnaligned)
15586     return false;
15587 
15588   // PowerPC supports unaligned memory access for simple non-vector types.
15589   // Although accessing unaligned addresses is not as efficient as accessing
15590   // aligned addresses, it is generally more efficient than manual expansion,
15591   // and generally only traps for software emulation when crossing page
15592   // boundaries.
15593 
15594   if (!VT.isSimple())
15595     return false;
15596 
15597   if (VT.isFloatingPoint() && !VT.isVector() &&
15598       !Subtarget.allowsUnalignedFPAccess())
15599     return false;
15600 
15601   if (VT.getSimpleVT().isVector()) {
15602     if (Subtarget.hasVSX()) {
15603       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
15604           VT != MVT::v4f32 && VT != MVT::v4i32)
15605         return false;
15606     } else {
15607       return false;
15608     }
15609   }
15610 
15611   if (VT == MVT::ppcf128)
15612     return false;
15613 
15614   if (Fast)
15615     *Fast = true;
15616 
15617   return true;
15618 }
15619 
15620 bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
15621                                                SDValue C) const {
15622   // Check integral scalar types.
15623   if (!VT.isScalarInteger())
15624     return false;
15625   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
15626     if (!ConstNode->getAPIntValue().isSignedIntN(64))
15627       return false;
15628     // This transformation will generate >= 2 operations. But the following
15629     // cases will generate <= 2 instructions during ISEL. So exclude them.
15630     // 1. If the constant multiplier fits 16 bits, it can be handled by one
15631     // HW instruction, ie. MULLI
15632     // 2. If the multiplier after shifted fits 16 bits, an extra shift
15633     // instruction is needed than case 1, ie. MULLI and RLDICR
15634     int64_t Imm = ConstNode->getSExtValue();
15635     unsigned Shift = countTrailingZeros<uint64_t>(Imm);
15636     Imm >>= Shift;
15637     if (isInt<16>(Imm))
15638       return false;
15639     uint64_t UImm = static_cast<uint64_t>(Imm);
15640     if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
15641         isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
15642       return true;
15643   }
15644   return false;
15645 }
15646 
15647 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
15648                                                    EVT VT) const {
15649   return isFMAFasterThanFMulAndFAdd(
15650       MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
15651 }
15652 
15653 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
15654                                                    Type *Ty) const {
15655   switch (Ty->getScalarType()->getTypeID()) {
15656   case Type::FloatTyID:
15657   case Type::DoubleTyID:
15658     return true;
15659   case Type::FP128TyID:
15660     return Subtarget.hasP9Vector();
15661   default:
15662     return false;
15663   }
15664 }
15665 
15666 // FIXME: add more patterns which are not profitable to hoist.
15667 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
15668   if (!I->hasOneUse())
15669     return true;
15670 
15671   Instruction *User = I->user_back();
15672   assert(User && "A single use instruction with no uses.");
15673 
15674   switch (I->getOpcode()) {
15675   case Instruction::FMul: {
15676     // Don't break FMA, PowerPC prefers FMA.
15677     if (User->getOpcode() != Instruction::FSub &&
15678         User->getOpcode() != Instruction::FAdd)
15679       return true;
15680 
15681     const TargetOptions &Options = getTargetMachine().Options;
15682     const Function *F = I->getFunction();
15683     const DataLayout &DL = F->getParent()->getDataLayout();
15684     Type *Ty = User->getOperand(0)->getType();
15685 
15686     return !(
15687         isFMAFasterThanFMulAndFAdd(*F, Ty) &&
15688         isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
15689         (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
15690   }
15691   case Instruction::Load: {
15692     // Don't break "store (load float*)" pattern, this pattern will be combined
15693     // to "store (load int32)" in later InstCombine pass. See function
15694     // combineLoadToOperationType. On PowerPC, loading a float point takes more
15695     // cycles than loading a 32 bit integer.
15696     LoadInst *LI = cast<LoadInst>(I);
15697     // For the loads that combineLoadToOperationType does nothing, like
15698     // ordered load, it should be profitable to hoist them.
15699     // For swifterror load, it can only be used for pointer to pointer type, so
15700     // later type check should get rid of this case.
15701     if (!LI->isUnordered())
15702       return true;
15703 
15704     if (User->getOpcode() != Instruction::Store)
15705       return true;
15706 
15707     if (I->getType()->getTypeID() != Type::FloatTyID)
15708       return true;
15709 
15710     return false;
15711   }
15712   default:
15713     return true;
15714   }
15715   return true;
15716 }
15717 
15718 const MCPhysReg *
15719 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
15720   // LR is a callee-save register, but we must treat it as clobbered by any call
15721   // site. Hence we include LR in the scratch registers, which are in turn added
15722   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
15723   // to CTR, which is used by any indirect call.
15724   static const MCPhysReg ScratchRegs[] = {
15725     PPC::X12, PPC::LR8, PPC::CTR8, 0
15726   };
15727 
15728   return ScratchRegs;
15729 }
15730 
15731 Register PPCTargetLowering::getExceptionPointerRegister(
15732     const Constant *PersonalityFn) const {
15733   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
15734 }
15735 
15736 Register PPCTargetLowering::getExceptionSelectorRegister(
15737     const Constant *PersonalityFn) const {
15738   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
15739 }
15740 
15741 bool
15742 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
15743                      EVT VT , unsigned DefinedValues) const {
15744   if (VT == MVT::v2i64)
15745     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
15746 
15747   if (Subtarget.hasVSX())
15748     return true;
15749 
15750   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
15751 }
15752 
15753 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
15754   if (DisableILPPref || Subtarget.enableMachineScheduler())
15755     return TargetLowering::getSchedulingPreference(N);
15756 
15757   return Sched::ILP;
15758 }
15759 
15760 // Create a fast isel object.
15761 FastISel *
15762 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
15763                                   const TargetLibraryInfo *LibInfo) const {
15764   return PPC::createFastISel(FuncInfo, LibInfo);
15765 }
15766 
15767 // 'Inverted' means the FMA opcode after negating one multiplicand.
15768 // For example, (fma -a b c) = (fnmsub a b c)
15769 static unsigned invertFMAOpcode(unsigned Opc) {
15770   switch (Opc) {
15771   default:
15772     llvm_unreachable("Invalid FMA opcode for PowerPC!");
15773   case ISD::FMA:
15774     return PPCISD::FNMSUB;
15775   case PPCISD::FNMSUB:
15776     return ISD::FMA;
15777   }
15778 }
15779 
15780 SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
15781                                                 bool LegalOps, bool OptForSize,
15782                                                 NegatibleCost &Cost,
15783                                                 unsigned Depth) const {
15784   if (Depth > SelectionDAG::MaxRecursionDepth)
15785     return SDValue();
15786 
15787   unsigned Opc = Op.getOpcode();
15788   EVT VT = Op.getValueType();
15789   SDNodeFlags Flags = Op.getNode()->getFlags();
15790 
15791   switch (Opc) {
15792   case PPCISD::FNMSUB:
15793     if (!Op.hasOneUse() || !isTypeLegal(VT))
15794       break;
15795 
15796     const TargetOptions &Options = getTargetMachine().Options;
15797     SDValue N0 = Op.getOperand(0);
15798     SDValue N1 = Op.getOperand(1);
15799     SDValue N2 = Op.getOperand(2);
15800     SDLoc Loc(Op);
15801 
15802     NegatibleCost N2Cost = NegatibleCost::Expensive;
15803     SDValue NegN2 =
15804         getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
15805 
15806     if (!NegN2)
15807       return SDValue();
15808 
15809     // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
15810     // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
15811     // These transformations may change sign of zeroes. For example,
15812     // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
15813     if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
15814       // Try and choose the cheaper one to negate.
15815       NegatibleCost N0Cost = NegatibleCost::Expensive;
15816       SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
15817                                            N0Cost, Depth + 1);
15818 
15819       NegatibleCost N1Cost = NegatibleCost::Expensive;
15820       SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
15821                                            N1Cost, Depth + 1);
15822 
15823       if (NegN0 && N0Cost <= N1Cost) {
15824         Cost = std::min(N0Cost, N2Cost);
15825         return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
15826       } else if (NegN1) {
15827         Cost = std::min(N1Cost, N2Cost);
15828         return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
15829       }
15830     }
15831 
15832     // (fneg (fnmsub a b c)) => (fma a b (fneg c))
15833     if (isOperationLegal(ISD::FMA, VT)) {
15834       Cost = N2Cost;
15835       return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
15836     }
15837 
15838     break;
15839   }
15840 
15841   return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
15842                                               Cost, Depth);
15843 }
15844 
15845 // Override to enable LOAD_STACK_GUARD lowering on Linux.
15846 bool PPCTargetLowering::useLoadStackGuardNode() const {
15847   if (!Subtarget.isTargetLinux())
15848     return TargetLowering::useLoadStackGuardNode();
15849   return true;
15850 }
15851 
15852 // Override to disable global variable loading on Linux.
15853 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
15854   if (!Subtarget.isTargetLinux())
15855     return TargetLowering::insertSSPDeclarations(M);
15856 }
15857 
15858 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
15859                                      bool ForCodeSize) const {
15860   if (!VT.isSimple() || !Subtarget.hasVSX())
15861     return false;
15862 
15863   switch(VT.getSimpleVT().SimpleTy) {
15864   default:
15865     // For FP types that are currently not supported by PPC backend, return
15866     // false. Examples: f16, f80.
15867     return false;
15868   case MVT::f32:
15869   case MVT::f64:
15870     if (Subtarget.hasPrefixInstrs()) {
15871       // With prefixed instructions, we can materialize anything that can be
15872       // represented with a 32-bit immediate, not just positive zero.
15873       APFloat APFloatOfImm = Imm;
15874       return convertToNonDenormSingle(APFloatOfImm);
15875     }
15876     LLVM_FALLTHROUGH;
15877   case MVT::ppcf128:
15878     return Imm.isPosZero();
15879   }
15880 }
15881 
15882 // For vector shift operation op, fold
15883 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
15884 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
15885                                   SelectionDAG &DAG) {
15886   SDValue N0 = N->getOperand(0);
15887   SDValue N1 = N->getOperand(1);
15888   EVT VT = N0.getValueType();
15889   unsigned OpSizeInBits = VT.getScalarSizeInBits();
15890   unsigned Opcode = N->getOpcode();
15891   unsigned TargetOpcode;
15892 
15893   switch (Opcode) {
15894   default:
15895     llvm_unreachable("Unexpected shift operation");
15896   case ISD::SHL:
15897     TargetOpcode = PPCISD::SHL;
15898     break;
15899   case ISD::SRL:
15900     TargetOpcode = PPCISD::SRL;
15901     break;
15902   case ISD::SRA:
15903     TargetOpcode = PPCISD::SRA;
15904     break;
15905   }
15906 
15907   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
15908       N1->getOpcode() == ISD::AND)
15909     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
15910       if (Mask->getZExtValue() == OpSizeInBits - 1)
15911         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
15912 
15913   return SDValue();
15914 }
15915 
15916 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
15917   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15918     return Value;
15919 
15920   SDValue N0 = N->getOperand(0);
15921   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15922   if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
15923       N0.getOpcode() != ISD::SIGN_EXTEND ||
15924       N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
15925       N->getValueType(0) != MVT::i64)
15926     return SDValue();
15927 
15928   // We can't save an operation here if the value is already extended, and
15929   // the existing shift is easier to combine.
15930   SDValue ExtsSrc = N0.getOperand(0);
15931   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
15932       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
15933     return SDValue();
15934 
15935   SDLoc DL(N0);
15936   SDValue ShiftBy = SDValue(CN1, 0);
15937   // We want the shift amount to be i32 on the extswli, but the shift could
15938   // have an i64.
15939   if (ShiftBy.getValueType() == MVT::i64)
15940     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
15941 
15942   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
15943                          ShiftBy);
15944 }
15945 
15946 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
15947   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15948     return Value;
15949 
15950   return SDValue();
15951 }
15952 
15953 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
15954   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15955     return Value;
15956 
15957   return SDValue();
15958 }
15959 
15960 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
15961 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
15962 // When C is zero, the equation (addi Z, -C) can be simplified to Z
15963 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
15964 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
15965                                  const PPCSubtarget &Subtarget) {
15966   if (!Subtarget.isPPC64())
15967     return SDValue();
15968 
15969   SDValue LHS = N->getOperand(0);
15970   SDValue RHS = N->getOperand(1);
15971 
15972   auto isZextOfCompareWithConstant = [](SDValue Op) {
15973     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
15974         Op.getValueType() != MVT::i64)
15975       return false;
15976 
15977     SDValue Cmp = Op.getOperand(0);
15978     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
15979         Cmp.getOperand(0).getValueType() != MVT::i64)
15980       return false;
15981 
15982     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
15983       int64_t NegConstant = 0 - Constant->getSExtValue();
15984       // Due to the limitations of the addi instruction,
15985       // -C is required to be [-32768, 32767].
15986       return isInt<16>(NegConstant);
15987     }
15988 
15989     return false;
15990   };
15991 
15992   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
15993   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
15994 
15995   // If there is a pattern, canonicalize a zext operand to the RHS.
15996   if (LHSHasPattern && !RHSHasPattern)
15997     std::swap(LHS, RHS);
15998   else if (!LHSHasPattern && !RHSHasPattern)
15999     return SDValue();
16000 
16001   SDLoc DL(N);
16002   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16003   SDValue Cmp = RHS.getOperand(0);
16004   SDValue Z = Cmp.getOperand(0);
16005   auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16006 
16007   assert(Constant && "Constant Should not be a null pointer.");
16008   int64_t NegConstant = 0 - Constant->getSExtValue();
16009 
16010   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16011   default: break;
16012   case ISD::SETNE: {
16013     //                                 when C == 0
16014     //                             --> addze X, (addic Z, -1).carry
16015     //                            /
16016     // add X, (zext(setne Z, C))--
16017     //                            \    when -32768 <= -C <= 32767 && C != 0
16018     //                             --> addze X, (addic (addi Z, -C), -1).carry
16019     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16020                               DAG.getConstant(NegConstant, DL, MVT::i64));
16021     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16022     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16023                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16024     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16025                        SDValue(Addc.getNode(), 1));
16026     }
16027   case ISD::SETEQ: {
16028     //                                 when C == 0
16029     //                             --> addze X, (subfic Z, 0).carry
16030     //                            /
16031     // add X, (zext(sete  Z, C))--
16032     //                            \    when -32768 <= -C <= 32767 && C != 0
16033     //                             --> addze X, (subfic (addi Z, -C), 0).carry
16034     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16035                               DAG.getConstant(NegConstant, DL, MVT::i64));
16036     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16037     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16038                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16039     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16040                        SDValue(Subc.getNode(), 1));
16041     }
16042   }
16043 
16044   return SDValue();
16045 }
16046 
16047 // Transform
16048 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16049 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16050 // In this case both C1 and C2 must be known constants.
16051 // C1+C2 must fit into a 34 bit signed integer.
16052 static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
16053                                           const PPCSubtarget &Subtarget) {
16054   if (!Subtarget.isUsingPCRelativeCalls())
16055     return SDValue();
16056 
16057   // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16058   // If we find that node try to cast the Global Address and the Constant.
16059   SDValue LHS = N->getOperand(0);
16060   SDValue RHS = N->getOperand(1);
16061 
16062   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16063     std::swap(LHS, RHS);
16064 
16065   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16066     return SDValue();
16067 
16068   // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16069   GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16070   ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16071 
16072   // Check that both casts succeeded.
16073   if (!GSDN || !ConstNode)
16074     return SDValue();
16075 
16076   int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16077   SDLoc DL(GSDN);
16078 
16079   // The signed int offset needs to fit in 34 bits.
16080   if (!isInt<34>(NewOffset))
16081     return SDValue();
16082 
16083   // The new global address is a copy of the old global address except
16084   // that it has the updated Offset.
16085   SDValue GA =
16086       DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16087                                  NewOffset, GSDN->getTargetFlags());
16088   SDValue MatPCRel =
16089       DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16090   return MatPCRel;
16091 }
16092 
16093 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16094   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16095     return Value;
16096 
16097   if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16098     return Value;
16099 
16100   return SDValue();
16101 }
16102 
16103 // Detect TRUNCATE operations on bitcasts of float128 values.
16104 // What we are looking for here is the situtation where we extract a subset
16105 // of bits from a 128 bit float.
16106 // This can be of two forms:
16107 // 1) BITCAST of f128 feeding TRUNCATE
16108 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16109 // The reason this is required is because we do not have a legal i128 type
16110 // and so we want to prevent having to store the f128 and then reload part
16111 // of it.
16112 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16113                                            DAGCombinerInfo &DCI) const {
16114   // If we are using CRBits then try that first.
16115   if (Subtarget.useCRBits()) {
16116     // Check if CRBits did anything and return that if it did.
16117     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16118       return CRTruncValue;
16119   }
16120 
16121   SDLoc dl(N);
16122   SDValue Op0 = N->getOperand(0);
16123 
16124   // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16125   if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16126     EVT VT = N->getValueType(0);
16127     if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16128       return SDValue();
16129     SDValue Sub = Op0.getOperand(0);
16130     if (Sub.getOpcode() == ISD::SUB) {
16131       SDValue SubOp0 = Sub.getOperand(0);
16132       SDValue SubOp1 = Sub.getOperand(1);
16133       if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16134           (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16135         return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16136                                SubOp1.getOperand(0),
16137                                DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16138       }
16139     }
16140   }
16141 
16142   // Looking for a truncate of i128 to i64.
16143   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16144     return SDValue();
16145 
16146   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16147 
16148   // SRL feeding TRUNCATE.
16149   if (Op0.getOpcode() == ISD::SRL) {
16150     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16151     // The right shift has to be by 64 bits.
16152     if (!ConstNode || ConstNode->getZExtValue() != 64)
16153       return SDValue();
16154 
16155     // Switch the element number to extract.
16156     EltToExtract = EltToExtract ? 0 : 1;
16157     // Update Op0 past the SRL.
16158     Op0 = Op0.getOperand(0);
16159   }
16160 
16161   // BITCAST feeding a TRUNCATE possibly via SRL.
16162   if (Op0.getOpcode() == ISD::BITCAST &&
16163       Op0.getValueType() == MVT::i128 &&
16164       Op0.getOperand(0).getValueType() == MVT::f128) {
16165     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16166     return DCI.DAG.getNode(
16167         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16168         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16169   }
16170   return SDValue();
16171 }
16172 
16173 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16174   SelectionDAG &DAG = DCI.DAG;
16175 
16176   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16177   if (!ConstOpOrElement)
16178     return SDValue();
16179 
16180   // An imul is usually smaller than the alternative sequence for legal type.
16181   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16182       isOperationLegal(ISD::MUL, N->getValueType(0)))
16183     return SDValue();
16184 
16185   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16186     switch (this->Subtarget.getCPUDirective()) {
16187     default:
16188       // TODO: enhance the condition for subtarget before pwr8
16189       return false;
16190     case PPC::DIR_PWR8:
16191       //  type        mul     add    shl
16192       // scalar        4       1      1
16193       // vector        7       2      2
16194       return true;
16195     case PPC::DIR_PWR9:
16196     case PPC::DIR_PWR10:
16197     case PPC::DIR_PWR_FUTURE:
16198       //  type        mul     add    shl
16199       // scalar        5       2      2
16200       // vector        7       2      2
16201 
16202       // The cycle RATIO of related operations are showed as a table above.
16203       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16204       // scalar and vector type. For 2 instrs patterns, add/sub + shl
16205       // are 4, it is always profitable; but for 3 instrs patterns
16206       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16207       // So we should only do it for vector type.
16208       return IsAddOne && IsNeg ? VT.isVector() : true;
16209     }
16210   };
16211 
16212   EVT VT = N->getValueType(0);
16213   SDLoc DL(N);
16214 
16215   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16216   bool IsNeg = MulAmt.isNegative();
16217   APInt MulAmtAbs = MulAmt.abs();
16218 
16219   if ((MulAmtAbs - 1).isPowerOf2()) {
16220     // (mul x, 2^N + 1) => (add (shl x, N), x)
16221     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16222 
16223     if (!IsProfitable(IsNeg, true, VT))
16224       return SDValue();
16225 
16226     SDValue Op0 = N->getOperand(0);
16227     SDValue Op1 =
16228         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16229                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16230     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16231 
16232     if (!IsNeg)
16233       return Res;
16234 
16235     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16236   } else if ((MulAmtAbs + 1).isPowerOf2()) {
16237     // (mul x, 2^N - 1) => (sub (shl x, N), x)
16238     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16239 
16240     if (!IsProfitable(IsNeg, false, VT))
16241       return SDValue();
16242 
16243     SDValue Op0 = N->getOperand(0);
16244     SDValue Op1 =
16245         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16246                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16247 
16248     if (!IsNeg)
16249       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16250     else
16251       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16252 
16253   } else {
16254     return SDValue();
16255   }
16256 }
16257 
16258 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16259 // in combiner since we need to check SD flags and other subtarget features.
16260 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16261                                           DAGCombinerInfo &DCI) const {
16262   SDValue N0 = N->getOperand(0);
16263   SDValue N1 = N->getOperand(1);
16264   SDValue N2 = N->getOperand(2);
16265   SDNodeFlags Flags = N->getFlags();
16266   EVT VT = N->getValueType(0);
16267   SelectionDAG &DAG = DCI.DAG;
16268   const TargetOptions &Options = getTargetMachine().Options;
16269   unsigned Opc = N->getOpcode();
16270   bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16271   bool LegalOps = !DCI.isBeforeLegalizeOps();
16272   SDLoc Loc(N);
16273 
16274   if (!isOperationLegal(ISD::FMA, VT))
16275     return SDValue();
16276 
16277   // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16278   // since (fnmsub a b c)=-0 while c-ab=+0.
16279   if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16280     return SDValue();
16281 
16282   // (fma (fneg a) b c) => (fnmsub a b c)
16283   // (fnmsub (fneg a) b c) => (fma a b c)
16284   if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16285     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16286 
16287   // (fma a (fneg b) c) => (fnmsub a b c)
16288   // (fnmsub a (fneg b) c) => (fma a b c)
16289   if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16290     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16291 
16292   return SDValue();
16293 }
16294 
16295 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16296   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16297   if (!Subtarget.is64BitELFABI())
16298     return false;
16299 
16300   // If not a tail call then no need to proceed.
16301   if (!CI->isTailCall())
16302     return false;
16303 
16304   // If sibling calls have been disabled and tail-calls aren't guaranteed
16305   // there is no reason to duplicate.
16306   auto &TM = getTargetMachine();
16307   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16308     return false;
16309 
16310   // Can't tail call a function called indirectly, or if it has variadic args.
16311   const Function *Callee = CI->getCalledFunction();
16312   if (!Callee || Callee->isVarArg())
16313     return false;
16314 
16315   // Make sure the callee and caller calling conventions are eligible for tco.
16316   const Function *Caller = CI->getParent()->getParent();
16317   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16318                                            CI->getCallingConv()))
16319       return false;
16320 
16321   // If the function is local then we have a good chance at tail-calling it
16322   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16323 }
16324 
16325 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16326   if (!Subtarget.hasVSX())
16327     return false;
16328   if (Subtarget.hasP9Vector() && VT == MVT::f128)
16329     return true;
16330   return VT == MVT::f32 || VT == MVT::f64 ||
16331     VT == MVT::v4f32 || VT == MVT::v2f64;
16332 }
16333 
16334 bool PPCTargetLowering::
16335 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16336   const Value *Mask = AndI.getOperand(1);
16337   // If the mask is suitable for andi. or andis. we should sink the and.
16338   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16339     // Can't handle constants wider than 64-bits.
16340     if (CI->getBitWidth() > 64)
16341       return false;
16342     int64_t ConstVal = CI->getZExtValue();
16343     return isUInt<16>(ConstVal) ||
16344       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16345   }
16346 
16347   // For non-constant masks, we can always use the record-form and.
16348   return true;
16349 }
16350 
16351 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16352 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16353 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16354 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16355 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16356 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16357   assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
16358   assert(Subtarget.hasP9Altivec() &&
16359          "Only combine this when P9 altivec supported!");
16360   EVT VT = N->getValueType(0);
16361   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16362     return SDValue();
16363 
16364   SelectionDAG &DAG = DCI.DAG;
16365   SDLoc dl(N);
16366   if (N->getOperand(0).getOpcode() == ISD::SUB) {
16367     // Even for signed integers, if it's known to be positive (as signed
16368     // integer) due to zero-extended inputs.
16369     unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16370     unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16371     if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16372          SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16373         (SubOpcd1 == ISD::ZERO_EXTEND ||
16374          SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16375       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16376                          N->getOperand(0)->getOperand(0),
16377                          N->getOperand(0)->getOperand(1),
16378                          DAG.getTargetConstant(0, dl, MVT::i32));
16379     }
16380 
16381     // For type v4i32, it can be optimized with xvnegsp + vabsduw
16382     if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16383         N->getOperand(0).hasOneUse()) {
16384       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16385                          N->getOperand(0)->getOperand(0),
16386                          N->getOperand(0)->getOperand(1),
16387                          DAG.getTargetConstant(1, dl, MVT::i32));
16388     }
16389   }
16390 
16391   return SDValue();
16392 }
16393 
16394 // For type v4i32/v8ii16/v16i8, transform
16395 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16396 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16397 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16398 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16399 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16400                                           DAGCombinerInfo &DCI) const {
16401   assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
16402   assert(Subtarget.hasP9Altivec() &&
16403          "Only combine this when P9 altivec supported!");
16404 
16405   SelectionDAG &DAG = DCI.DAG;
16406   SDLoc dl(N);
16407   SDValue Cond = N->getOperand(0);
16408   SDValue TrueOpnd = N->getOperand(1);
16409   SDValue FalseOpnd = N->getOperand(2);
16410   EVT VT = N->getOperand(1).getValueType();
16411 
16412   if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16413       FalseOpnd.getOpcode() != ISD::SUB)
16414     return SDValue();
16415 
16416   // ABSD only available for type v4i32/v8i16/v16i8
16417   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16418     return SDValue();
16419 
16420   // At least to save one more dependent computation
16421   if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16422     return SDValue();
16423 
16424   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16425 
16426   // Can only handle unsigned comparison here
16427   switch (CC) {
16428   default:
16429     return SDValue();
16430   case ISD::SETUGT:
16431   case ISD::SETUGE:
16432     break;
16433   case ISD::SETULT:
16434   case ISD::SETULE:
16435     std::swap(TrueOpnd, FalseOpnd);
16436     break;
16437   }
16438 
16439   SDValue CmpOpnd1 = Cond.getOperand(0);
16440   SDValue CmpOpnd2 = Cond.getOperand(1);
16441 
16442   // SETCC CmpOpnd1 CmpOpnd2 cond
16443   // TrueOpnd = CmpOpnd1 - CmpOpnd2
16444   // FalseOpnd = CmpOpnd2 - CmpOpnd1
16445   if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16446       TrueOpnd.getOperand(1) == CmpOpnd2 &&
16447       FalseOpnd.getOperand(0) == CmpOpnd2 &&
16448       FalseOpnd.getOperand(1) == CmpOpnd1) {
16449     return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16450                        CmpOpnd1, CmpOpnd2,
16451                        DAG.getTargetConstant(0, dl, MVT::i32));
16452   }
16453 
16454   return SDValue();
16455 }
16456