1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56 #include "llvm/CodeGen/TargetRegisterInfo.h"
57 #include "llvm/CodeGen/ValueTypes.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSectionXCOFF.h"
78 #include "llvm/MC/MCSymbolXCOFF.h"
79 #include "llvm/Support/AtomicOrdering.h"
80 #include "llvm/Support/BranchProbability.h"
81 #include "llvm/Support/Casting.h"
82 #include "llvm/Support/CodeGen.h"
83 #include "llvm/Support/CommandLine.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
86 #include "llvm/Support/ErrorHandling.h"
87 #include "llvm/Support/Format.h"
88 #include "llvm/Support/KnownBits.h"
89 #include "llvm/Support/MachineValueType.h"
90 #include "llvm/Support/MathExtras.h"
91 #include "llvm/Support/raw_ostream.h"
92 #include "llvm/Target/TargetMachine.h"
93 #include "llvm/Target/TargetOptions.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
124 static cl::opt<bool> EnablePPCPCRelTLS(
125     "enable-ppc-pcrel-tls",
126     cl::desc("enable the use of PC relative memops in TLS instructions on PPC"),
127     cl::Hidden);
128 
129 STATISTIC(NumTailCalls, "Number of tail calls");
130 STATISTIC(NumSiblingCalls, "Number of sibling calls");
131 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
132 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
133 
134 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135 
136 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137 
138 // FIXME: Remove this once the bug has been fixed!
139 extern cl::opt<bool> ANDIGlueBug;
140 
141 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
142                                      const PPCSubtarget &STI)
143     : TargetLowering(TM), Subtarget(STI) {
144   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145   // arguments are at least 4/8 bytes aligned.
146   bool isPPC64 = Subtarget.isPPC64();
147   setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148 
149   // Set up the register classes.
150   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151   if (!useSoftFloat()) {
152     if (hasSPE()) {
153       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154       addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
155     } else {
156       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
157       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
158     }
159   }
160 
161   // Match BITREVERSE to customized fast code sequence in the td file.
162   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
163   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
164 
165   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
166   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
167 
168   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
169   for (MVT VT : MVT::integer_valuetypes()) {
170     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
171     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
172   }
173 
174   if (Subtarget.isISA3_0()) {
175     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
176     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
177     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
178     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
179   } else {
180     // No extending loads from f16 or HW conversions back and forth.
181     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
182     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
183     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
184     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
185     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
186     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
187     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
188     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189   }
190 
191   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
192 
193   // PowerPC has pre-inc load and store's.
194   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
195   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
196   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
197   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
198   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
199   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
200   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
201   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
202   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
203   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
204   if (!Subtarget.hasSPE()) {
205     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
206     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
207     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
208     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
209   }
210 
211   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
212   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
213   for (MVT VT : ScalarIntVTs) {
214     setOperationAction(ISD::ADDC, VT, Legal);
215     setOperationAction(ISD::ADDE, VT, Legal);
216     setOperationAction(ISD::SUBC, VT, Legal);
217     setOperationAction(ISD::SUBE, VT, Legal);
218   }
219 
220   if (Subtarget.useCRBits()) {
221     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
222 
223     if (isPPC64 || Subtarget.hasFPCVT()) {
224       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
225       AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
226                         isPPC64 ? MVT::i64 : MVT::i32);
227       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
228       AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
229                         isPPC64 ? MVT::i64 : MVT::i32);
230 
231       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
232       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
233                          isPPC64 ? MVT::i64 : MVT::i32);
234       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
235       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
236                         isPPC64 ? MVT::i64 : MVT::i32);
237     } else {
238       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
239       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
240       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
241       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
242     }
243 
244     // PowerPC does not support direct load/store of condition registers.
245     setOperationAction(ISD::LOAD, MVT::i1, Custom);
246     setOperationAction(ISD::STORE, MVT::i1, Custom);
247 
248     // FIXME: Remove this once the ANDI glue bug is fixed:
249     if (ANDIGlueBug)
250       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
251 
252     for (MVT VT : MVT::integer_valuetypes()) {
253       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
254       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
255       setTruncStoreAction(VT, MVT::i1, Expand);
256     }
257 
258     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
259   }
260 
261   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
262   // PPC (the libcall is not available).
263   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
264   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
265   setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
266   setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
267 
268   // We do not currently implement these libm ops for PowerPC.
269   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
270   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
271   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
272   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
273   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
274   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
275 
276   // PowerPC has no SREM/UREM instructions unless we are on P9
277   // On P9 we may use a hardware instruction to compute the remainder.
278   // When the result of both the remainder and the division is required it is
279   // more efficient to compute the remainder from the result of the division
280   // rather than use the remainder instruction. The instructions are legalized
281   // directly because the DivRemPairsPass performs the transformation at the IR
282   // level.
283   if (Subtarget.isISA3_0()) {
284     setOperationAction(ISD::SREM, MVT::i32, Legal);
285     setOperationAction(ISD::UREM, MVT::i32, Legal);
286     setOperationAction(ISD::SREM, MVT::i64, Legal);
287     setOperationAction(ISD::UREM, MVT::i64, Legal);
288   } else {
289     setOperationAction(ISD::SREM, MVT::i32, Expand);
290     setOperationAction(ISD::UREM, MVT::i32, Expand);
291     setOperationAction(ISD::SREM, MVT::i64, Expand);
292     setOperationAction(ISD::UREM, MVT::i64, Expand);
293   }
294 
295   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
296   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
297   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
298   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
299   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
300   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
301   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
302   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
303   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
304 
305   // Handle constrained floating-point operations of scalar.
306   // TODO: Handle SPE specific operation.
307   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
308   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
309   setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
310   setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
311   setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
312   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
313 
314   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
315   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
316   setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
317   setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
318   setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
319   if (Subtarget.hasVSX()) {
320     setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
321     setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
322   }
323 
324   if (Subtarget.hasFSQRT()) {
325     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
326     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
327   }
328 
329   if (Subtarget.hasFPRND()) {
330     setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
331     setOperationAction(ISD::STRICT_FCEIL,  MVT::f32, Legal);
332     setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
333     setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
334 
335     setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
336     setOperationAction(ISD::STRICT_FCEIL,  MVT::f64, Legal);
337     setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
338     setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
339   }
340 
341   // We don't support sin/cos/sqrt/fmod/pow
342   setOperationAction(ISD::FSIN , MVT::f64, Expand);
343   setOperationAction(ISD::FCOS , MVT::f64, Expand);
344   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
345   setOperationAction(ISD::FREM , MVT::f64, Expand);
346   setOperationAction(ISD::FPOW , MVT::f64, Expand);
347   setOperationAction(ISD::FSIN , MVT::f32, Expand);
348   setOperationAction(ISD::FCOS , MVT::f32, Expand);
349   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
350   setOperationAction(ISD::FREM , MVT::f32, Expand);
351   setOperationAction(ISD::FPOW , MVT::f32, Expand);
352   if (Subtarget.hasSPE()) {
353     setOperationAction(ISD::FMA  , MVT::f64, Expand);
354     setOperationAction(ISD::FMA  , MVT::f32, Expand);
355   } else {
356     setOperationAction(ISD::FMA  , MVT::f64, Legal);
357     setOperationAction(ISD::FMA  , MVT::f32, Legal);
358   }
359 
360   if (Subtarget.hasSPE())
361     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
362 
363   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
364 
365   // If we're enabling GP optimizations, use hardware square root
366   if (!Subtarget.hasFSQRT() &&
367       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
368         Subtarget.hasFRE()))
369     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
370 
371   if (!Subtarget.hasFSQRT() &&
372       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
373         Subtarget.hasFRES()))
374     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
375 
376   if (Subtarget.hasFCPSGN()) {
377     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
378     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
379   } else {
380     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
381     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
382   }
383 
384   if (Subtarget.hasFPRND()) {
385     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
386     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
387     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
388     setOperationAction(ISD::FROUND, MVT::f64, Legal);
389 
390     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
391     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
392     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
393     setOperationAction(ISD::FROUND, MVT::f32, Legal);
394   }
395 
396   // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
397   // to speed up scalar BSWAP64.
398   // CTPOP or CTTZ were introduced in P8/P9 respectively
399   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
400   if (Subtarget.hasP9Vector())
401     setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
402   else
403     setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
404   if (Subtarget.isISA3_0()) {
405     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
406     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
407   } else {
408     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
409     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
410   }
411 
412   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
413     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
414     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
415   } else {
416     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
417     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
418   }
419 
420   // PowerPC does not have ROTR
421   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
422   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
423 
424   if (!Subtarget.useCRBits()) {
425     // PowerPC does not have Select
426     setOperationAction(ISD::SELECT, MVT::i32, Expand);
427     setOperationAction(ISD::SELECT, MVT::i64, Expand);
428     setOperationAction(ISD::SELECT, MVT::f32, Expand);
429     setOperationAction(ISD::SELECT, MVT::f64, Expand);
430   }
431 
432   // PowerPC wants to turn select_cc of FP into fsel when possible.
433   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
434   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
435 
436   // PowerPC wants to optimize integer setcc a bit
437   if (!Subtarget.useCRBits())
438     setOperationAction(ISD::SETCC, MVT::i32, Custom);
439 
440   if (Subtarget.hasFPU()) {
441     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
442     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
443     setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
444 
445     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
446     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
447     setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
448   }
449 
450   // PowerPC does not have BRCOND which requires SetCC
451   if (!Subtarget.useCRBits())
452     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
453 
454   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
455 
456   if (Subtarget.hasSPE()) {
457     // SPE has built-in conversions
458     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
459     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
460     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
461     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
462     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
463     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
464   } else {
465     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
466     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
467     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
468 
469     // PowerPC does not have [U|S]INT_TO_FP
470     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
471     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
472     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
473     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
474   }
475 
476   if (Subtarget.hasDirectMove() && isPPC64) {
477     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
478     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
479     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
480     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
481     if (TM.Options.UnsafeFPMath) {
482       setOperationAction(ISD::LRINT, MVT::f64, Legal);
483       setOperationAction(ISD::LRINT, MVT::f32, Legal);
484       setOperationAction(ISD::LLRINT, MVT::f64, Legal);
485       setOperationAction(ISD::LLRINT, MVT::f32, Legal);
486       setOperationAction(ISD::LROUND, MVT::f64, Legal);
487       setOperationAction(ISD::LROUND, MVT::f32, Legal);
488       setOperationAction(ISD::LLROUND, MVT::f64, Legal);
489       setOperationAction(ISD::LLROUND, MVT::f32, Legal);
490     }
491   } else {
492     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
493     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
494     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
495     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
496   }
497 
498   // We cannot sextinreg(i1).  Expand to shifts.
499   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
500 
501   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
502   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
503   // support continuation, user-level threading, and etc.. As a result, no
504   // other SjLj exception interfaces are implemented and please don't build
505   // your own exception handling based on them.
506   // LLVM/Clang supports zero-cost DWARF exception handling.
507   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
508   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
509 
510   // We want to legalize GlobalAddress and ConstantPool nodes into the
511   // appropriate instructions to materialize the address.
512   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
513   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
514   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
515   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
516   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
517   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
518   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
519   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
520   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
521   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
522 
523   // TRAP is legal.
524   setOperationAction(ISD::TRAP, MVT::Other, Legal);
525 
526   // TRAMPOLINE is custom lowered.
527   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
528   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
529 
530   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
531   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
532 
533   if (Subtarget.is64BitELFABI()) {
534     // VAARG always uses double-word chunks, so promote anything smaller.
535     setOperationAction(ISD::VAARG, MVT::i1, Promote);
536     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
537     setOperationAction(ISD::VAARG, MVT::i8, Promote);
538     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
539     setOperationAction(ISD::VAARG, MVT::i16, Promote);
540     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
541     setOperationAction(ISD::VAARG, MVT::i32, Promote);
542     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
543     setOperationAction(ISD::VAARG, MVT::Other, Expand);
544   } else if (Subtarget.is32BitELFABI()) {
545     // VAARG is custom lowered with the 32-bit SVR4 ABI.
546     setOperationAction(ISD::VAARG, MVT::Other, Custom);
547     setOperationAction(ISD::VAARG, MVT::i64, Custom);
548   } else
549     setOperationAction(ISD::VAARG, MVT::Other, Expand);
550 
551   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
552   if (Subtarget.is32BitELFABI())
553     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
554   else
555     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
556 
557   // Use the default implementation.
558   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
559   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
560   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
561   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
562   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
563   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
564   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
565   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
566   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
567 
568   // We want to custom lower some of our intrinsics.
569   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
570 
571   // To handle counter-based loop conditions.
572   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
573 
574   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
575   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
576   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
577   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
578 
579   // Comparisons that require checking two conditions.
580   if (Subtarget.hasSPE()) {
581     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
582     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
583     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
584     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
585   }
586   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
587   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
588   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
589   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
590   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
591   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
592   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
593   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
594   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
595   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
596   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
597   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
598 
599   if (Subtarget.has64BitSupport()) {
600     // They also have instructions for converting between i64 and fp.
601     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
602     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
603     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
604     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
605     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
606     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
607     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
608     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
609     // This is just the low 32 bits of a (signed) fp->i64 conversion.
610     // We cannot do this with Promote because i64 is not a legal type.
611     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
612     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
613 
614     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
615       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
616       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
617     }
618   } else {
619     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
620     if (Subtarget.hasSPE()) {
621       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
622       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
623     } else {
624       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
625       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
626     }
627   }
628 
629   // With the instructions enabled under FPCVT, we can do everything.
630   if (Subtarget.hasFPCVT()) {
631     if (Subtarget.has64BitSupport()) {
632       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
633       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
634       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
635       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
636       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
637       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
638       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
639       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
640     }
641 
642     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
643     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
644     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
645     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
646     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
647     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
648     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
649     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
650   }
651 
652   if (Subtarget.use64BitRegs()) {
653     // 64-bit PowerPC implementations can support i64 types directly
654     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
655     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
656     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
657     // 64-bit PowerPC wants to expand i128 shifts itself.
658     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
659     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
660     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
661   } else {
662     // 32-bit PowerPC wants to expand i64 shifts itself.
663     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
664     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
665     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
666   }
667 
668   // PowerPC has better expansions for funnel shifts than the generic
669   // TargetLowering::expandFunnelShift.
670   if (Subtarget.has64BitSupport()) {
671     setOperationAction(ISD::FSHL, MVT::i64, Custom);
672     setOperationAction(ISD::FSHR, MVT::i64, Custom);
673   }
674   setOperationAction(ISD::FSHL, MVT::i32, Custom);
675   setOperationAction(ISD::FSHR, MVT::i32, Custom);
676 
677   if (Subtarget.hasVSX()) {
678     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
679     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
680     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
681     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
682   }
683 
684   if (Subtarget.hasAltivec()) {
685     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
686       setOperationAction(ISD::SADDSAT, VT, Legal);
687       setOperationAction(ISD::SSUBSAT, VT, Legal);
688       setOperationAction(ISD::UADDSAT, VT, Legal);
689       setOperationAction(ISD::USUBSAT, VT, Legal);
690     }
691     // First set operation action for all vector types to expand. Then we
692     // will selectively turn on ones that can be effectively codegen'd.
693     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
694       // add/sub are legal for all supported vector VT's.
695       setOperationAction(ISD::ADD, VT, Legal);
696       setOperationAction(ISD::SUB, VT, Legal);
697 
698       // For v2i64, these are only valid with P8Vector. This is corrected after
699       // the loop.
700       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
701         setOperationAction(ISD::SMAX, VT, Legal);
702         setOperationAction(ISD::SMIN, VT, Legal);
703         setOperationAction(ISD::UMAX, VT, Legal);
704         setOperationAction(ISD::UMIN, VT, Legal);
705       }
706       else {
707         setOperationAction(ISD::SMAX, VT, Expand);
708         setOperationAction(ISD::SMIN, VT, Expand);
709         setOperationAction(ISD::UMAX, VT, Expand);
710         setOperationAction(ISD::UMIN, VT, Expand);
711       }
712 
713       if (Subtarget.hasVSX()) {
714         setOperationAction(ISD::FMAXNUM, VT, Legal);
715         setOperationAction(ISD::FMINNUM, VT, Legal);
716       }
717 
718       // Vector instructions introduced in P8
719       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
720         setOperationAction(ISD::CTPOP, VT, Legal);
721         setOperationAction(ISD::CTLZ, VT, Legal);
722       }
723       else {
724         setOperationAction(ISD::CTPOP, VT, Expand);
725         setOperationAction(ISD::CTLZ, VT, Expand);
726       }
727 
728       // Vector instructions introduced in P9
729       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
730         setOperationAction(ISD::CTTZ, VT, Legal);
731       else
732         setOperationAction(ISD::CTTZ, VT, Expand);
733 
734       // We promote all shuffles to v16i8.
735       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
736       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
737 
738       // We promote all non-typed operations to v4i32.
739       setOperationAction(ISD::AND   , VT, Promote);
740       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
741       setOperationAction(ISD::OR    , VT, Promote);
742       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
743       setOperationAction(ISD::XOR   , VT, Promote);
744       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
745       setOperationAction(ISD::LOAD  , VT, Promote);
746       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
747       setOperationAction(ISD::SELECT, VT, Promote);
748       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
749       setOperationAction(ISD::VSELECT, VT, Legal);
750       setOperationAction(ISD::SELECT_CC, VT, Promote);
751       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
752       setOperationAction(ISD::STORE, VT, Promote);
753       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
754 
755       // No other operations are legal.
756       setOperationAction(ISD::MUL , VT, Expand);
757       setOperationAction(ISD::SDIV, VT, Expand);
758       setOperationAction(ISD::SREM, VT, Expand);
759       setOperationAction(ISD::UDIV, VT, Expand);
760       setOperationAction(ISD::UREM, VT, Expand);
761       setOperationAction(ISD::FDIV, VT, Expand);
762       setOperationAction(ISD::FREM, VT, Expand);
763       setOperationAction(ISD::FNEG, VT, Expand);
764       setOperationAction(ISD::FSQRT, VT, Expand);
765       setOperationAction(ISD::FLOG, VT, Expand);
766       setOperationAction(ISD::FLOG10, VT, Expand);
767       setOperationAction(ISD::FLOG2, VT, Expand);
768       setOperationAction(ISD::FEXP, VT, Expand);
769       setOperationAction(ISD::FEXP2, VT, Expand);
770       setOperationAction(ISD::FSIN, VT, Expand);
771       setOperationAction(ISD::FCOS, VT, Expand);
772       setOperationAction(ISD::FABS, VT, Expand);
773       setOperationAction(ISD::FFLOOR, VT, Expand);
774       setOperationAction(ISD::FCEIL,  VT, Expand);
775       setOperationAction(ISD::FTRUNC, VT, Expand);
776       setOperationAction(ISD::FRINT,  VT, Expand);
777       setOperationAction(ISD::FNEARBYINT, VT, Expand);
778       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
779       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
780       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
781       setOperationAction(ISD::MULHU, VT, Expand);
782       setOperationAction(ISD::MULHS, VT, Expand);
783       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
784       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
785       setOperationAction(ISD::UDIVREM, VT, Expand);
786       setOperationAction(ISD::SDIVREM, VT, Expand);
787       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
788       setOperationAction(ISD::FPOW, VT, Expand);
789       setOperationAction(ISD::BSWAP, VT, Expand);
790       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
791       setOperationAction(ISD::ROTL, VT, Expand);
792       setOperationAction(ISD::ROTR, VT, Expand);
793 
794       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
795         setTruncStoreAction(VT, InnerVT, Expand);
796         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
797         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
798         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
799       }
800     }
801     setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
802     if (!Subtarget.hasP8Vector()) {
803       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
804       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
805       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
806       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
807     }
808 
809     for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
810       setOperationAction(ISD::ABS, VT, Custom);
811 
812     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
813     // with merges, splats, etc.
814     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
815 
816     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
817     // are cheap, so handle them before they get expanded to scalar.
818     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
819     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
820     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
821     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
822     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
823 
824     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
825     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
826     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
827     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
828     setOperationAction(ISD::SELECT, MVT::v4i32,
829                        Subtarget.useCRBits() ? Legal : Expand);
830     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
831     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
832     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
833     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
834     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
835     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
836     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
837     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
838     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
839     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
840     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
841     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
842     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
843 
844     // Without hasP8Altivec set, v2i64 SMAX isn't available.
845     // But ABS custom lowering requires SMAX support.
846     if (!Subtarget.hasP8Altivec())
847       setOperationAction(ISD::ABS, MVT::v2i64, Expand);
848 
849     // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
850     setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
851     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
852     if (Subtarget.hasAltivec())
853       for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
854         setOperationAction(ISD::ROTL, VT, Legal);
855     // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
856     if (Subtarget.hasP8Altivec())
857       setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
858 
859     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
860     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
861     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
862     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
863 
864     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
865     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
866 
867     if (Subtarget.hasVSX()) {
868       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
869       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
870     }
871 
872     if (Subtarget.hasP8Altivec())
873       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
874     else
875       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
876 
877     if (Subtarget.isISA3_1()) {
878       setOperationAction(ISD::MUL, MVT::v2i64, Legal);
879       setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
880       setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
881       setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
882       setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
883       setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
884       setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
885       setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
886       setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
887       setOperationAction(ISD::UREM, MVT::v2i64, Legal);
888       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
889       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
890       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
891       setOperationAction(ISD::UREM, MVT::v1i128, Legal);
892       setOperationAction(ISD::SREM, MVT::v1i128, Legal);
893       setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
894       setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
895     }
896 
897     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
898     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
899 
900     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
901     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
902 
903     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
904     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
905     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
906     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
907 
908     // Altivec does not contain unordered floating-point compare instructions
909     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
910     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
911     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
912     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
913 
914     if (Subtarget.hasVSX()) {
915       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
916       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
917       if (Subtarget.hasP8Vector()) {
918         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
919         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
920       }
921       if (Subtarget.hasDirectMove() && isPPC64) {
922         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
923         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
924         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
925         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
926         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
927         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
928         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
929         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
930       }
931       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
932 
933       // The nearbyint variants are not allowed to raise the inexact exception
934       // so we can only code-gen them with unsafe math.
935       if (TM.Options.UnsafeFPMath) {
936         setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
937         setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
938       }
939 
940       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
941       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
942       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
943       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
944       setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
945       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
946       setOperationAction(ISD::FROUND, MVT::f64, Legal);
947       setOperationAction(ISD::FRINT, MVT::f64, Legal);
948 
949       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
950       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
951       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
952       setOperationAction(ISD::FROUND, MVT::f32, Legal);
953       setOperationAction(ISD::FRINT, MVT::f32, Legal);
954 
955       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
956       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
957 
958       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
959       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
960 
961       // Share the Altivec comparison restrictions.
962       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
963       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
964       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
965       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
966 
967       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
968       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
969 
970       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
971 
972       if (Subtarget.hasP8Vector())
973         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
974 
975       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
976 
977       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
978       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
979       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
980 
981       if (Subtarget.hasP8Altivec()) {
982         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
983         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
984         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
985 
986         // 128 bit shifts can be accomplished via 3 instructions for SHL and
987         // SRL, but not for SRA because of the instructions available:
988         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
989         // doing
990         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
991         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
992         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
993 
994         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
995       }
996       else {
997         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
998         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
999         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1000 
1001         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1002 
1003         // VSX v2i64 only supports non-arithmetic operations.
1004         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1005         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1006       }
1007 
1008       if (Subtarget.isISA3_1())
1009         setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1010       else
1011         setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1012 
1013       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1014       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1015       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1016       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1017 
1018       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1019 
1020       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1021       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1022       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1023       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1024       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1025       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1026       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1027       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1028 
1029       // Custom handling for partial vectors of integers converted to
1030       // floating point. We already have optimal handling for v2i32 through
1031       // the DAG combine, so those aren't necessary.
1032       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1033       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1034       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1035       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1036       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1037       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1038       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1039       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1040       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1041       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1042       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1043       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1044       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1045       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1046       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1047       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1048 
1049       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1050       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1051       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1052       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1053       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1054       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1055 
1056       if (Subtarget.hasDirectMove())
1057         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1058       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1059 
1060       // Handle constrained floating-point operations of vector.
1061       // The predictor is `hasVSX` because altivec instruction has
1062       // no exception but VSX vector instruction has.
1063       setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1064       setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1065       setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1066       setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1067       setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1068       setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1069       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1070       setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1071       setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1072       setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1073       setOperationAction(ISD::STRICT_FCEIL,  MVT::v4f32, Legal);
1074       setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1075       setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1076 
1077       setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1078       setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1079       setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1080       setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1081       setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1082       setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1083       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1084       setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1085       setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1086       setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1087       setOperationAction(ISD::STRICT_FCEIL,  MVT::v2f64, Legal);
1088       setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1089       setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1090 
1091       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1092     }
1093 
1094     if (Subtarget.hasP8Altivec()) {
1095       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1096       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1097     }
1098 
1099     if (Subtarget.hasP9Vector()) {
1100       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1101       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1102 
1103       // 128 bit shifts can be accomplished via 3 instructions for SHL and
1104       // SRL, but not for SRA because of the instructions available:
1105       // VS{RL} and VS{RL}O.
1106       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1107       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1108       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1109 
1110       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1111       setOperationAction(ISD::FADD, MVT::f128, Legal);
1112       setOperationAction(ISD::FSUB, MVT::f128, Legal);
1113       setOperationAction(ISD::FDIV, MVT::f128, Legal);
1114       setOperationAction(ISD::FMUL, MVT::f128, Legal);
1115       setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1116       // No extending loads to f128 on PPC.
1117       for (MVT FPT : MVT::fp_valuetypes())
1118         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1119       setOperationAction(ISD::FMA, MVT::f128, Legal);
1120       setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1121       setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1122       setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1123       setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1124       setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1125       setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1126 
1127       setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1128       setOperationAction(ISD::FRINT, MVT::f128, Legal);
1129       setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1130       setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1131       setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1132       setOperationAction(ISD::FROUND, MVT::f128, Legal);
1133 
1134       setOperationAction(ISD::SELECT, MVT::f128, Expand);
1135       setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1136       setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1137       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1138       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1139       setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1140       // No implementation for these ops for PowerPC.
1141       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1142       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1143       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1144       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1145       setOperationAction(ISD::FREM, MVT::f128, Expand);
1146 
1147       // Handle constrained floating-point operations of fp128
1148       setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1149       setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1150       setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1151       setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1152       setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1153       setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1154       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1155       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1156       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1157       setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1158       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1159       setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1160       setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1161       setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1162       setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1163       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1164       setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1165       setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1166       setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1167       setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1168     }
1169 
1170     if (Subtarget.hasP9Altivec()) {
1171       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1172       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1173 
1174       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1175       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1176       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1177       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1178       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1179       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1180       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1181     }
1182   }
1183 
1184   if (Subtarget.has64BitSupport())
1185     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1186 
1187   if (Subtarget.isISA3_1())
1188     setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1189 
1190   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1191 
1192   if (!isPPC64) {
1193     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1194     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1195   }
1196 
1197   setBooleanContents(ZeroOrOneBooleanContent);
1198 
1199   if (Subtarget.hasAltivec()) {
1200     // Altivec instructions set fields to all zeros or all ones.
1201     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1202   }
1203 
1204   if (!isPPC64) {
1205     // These libcalls are not available in 32-bit.
1206     setLibcallName(RTLIB::SHL_I128, nullptr);
1207     setLibcallName(RTLIB::SRL_I128, nullptr);
1208     setLibcallName(RTLIB::SRA_I128, nullptr);
1209   }
1210 
1211   if (!isPPC64)
1212     setMaxAtomicSizeInBitsSupported(32);
1213 
1214   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1215 
1216   // We have target-specific dag combine patterns for the following nodes:
1217   setTargetDAGCombine(ISD::ADD);
1218   setTargetDAGCombine(ISD::SHL);
1219   setTargetDAGCombine(ISD::SRA);
1220   setTargetDAGCombine(ISD::SRL);
1221   setTargetDAGCombine(ISD::MUL);
1222   setTargetDAGCombine(ISD::FMA);
1223   setTargetDAGCombine(ISD::SINT_TO_FP);
1224   setTargetDAGCombine(ISD::BUILD_VECTOR);
1225   if (Subtarget.hasFPCVT())
1226     setTargetDAGCombine(ISD::UINT_TO_FP);
1227   setTargetDAGCombine(ISD::LOAD);
1228   setTargetDAGCombine(ISD::STORE);
1229   setTargetDAGCombine(ISD::BR_CC);
1230   if (Subtarget.useCRBits())
1231     setTargetDAGCombine(ISD::BRCOND);
1232   setTargetDAGCombine(ISD::BSWAP);
1233   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1234   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1235   setTargetDAGCombine(ISD::INTRINSIC_VOID);
1236 
1237   setTargetDAGCombine(ISD::SIGN_EXTEND);
1238   setTargetDAGCombine(ISD::ZERO_EXTEND);
1239   setTargetDAGCombine(ISD::ANY_EXTEND);
1240 
1241   setTargetDAGCombine(ISD::TRUNCATE);
1242   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1243 
1244 
1245   if (Subtarget.useCRBits()) {
1246     setTargetDAGCombine(ISD::TRUNCATE);
1247     setTargetDAGCombine(ISD::SETCC);
1248     setTargetDAGCombine(ISD::SELECT_CC);
1249   }
1250 
1251   if (Subtarget.hasP9Altivec()) {
1252     setTargetDAGCombine(ISD::ABS);
1253     setTargetDAGCombine(ISD::VSELECT);
1254   }
1255 
1256   setLibcallName(RTLIB::LOG_F128, "logf128");
1257   setLibcallName(RTLIB::LOG2_F128, "log2f128");
1258   setLibcallName(RTLIB::LOG10_F128, "log10f128");
1259   setLibcallName(RTLIB::EXP_F128, "expf128");
1260   setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1261   setLibcallName(RTLIB::SIN_F128, "sinf128");
1262   setLibcallName(RTLIB::COS_F128, "cosf128");
1263   setLibcallName(RTLIB::POW_F128, "powf128");
1264   setLibcallName(RTLIB::FMIN_F128, "fminf128");
1265   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1266   setLibcallName(RTLIB::POWI_F128, "__powikf2");
1267   setLibcallName(RTLIB::REM_F128, "fmodf128");
1268 
1269   // With 32 condition bits, we don't need to sink (and duplicate) compares
1270   // aggressively in CodeGenPrep.
1271   if (Subtarget.useCRBits()) {
1272     setHasMultipleConditionRegisters();
1273     setJumpIsExpensive();
1274   }
1275 
1276   setMinFunctionAlignment(Align(4));
1277 
1278   switch (Subtarget.getCPUDirective()) {
1279   default: break;
1280   case PPC::DIR_970:
1281   case PPC::DIR_A2:
1282   case PPC::DIR_E500:
1283   case PPC::DIR_E500mc:
1284   case PPC::DIR_E5500:
1285   case PPC::DIR_PWR4:
1286   case PPC::DIR_PWR5:
1287   case PPC::DIR_PWR5X:
1288   case PPC::DIR_PWR6:
1289   case PPC::DIR_PWR6X:
1290   case PPC::DIR_PWR7:
1291   case PPC::DIR_PWR8:
1292   case PPC::DIR_PWR9:
1293   case PPC::DIR_PWR10:
1294   case PPC::DIR_PWR_FUTURE:
1295     setPrefLoopAlignment(Align(16));
1296     setPrefFunctionAlignment(Align(16));
1297     break;
1298   }
1299 
1300   if (Subtarget.enableMachineScheduler())
1301     setSchedulingPreference(Sched::Source);
1302   else
1303     setSchedulingPreference(Sched::Hybrid);
1304 
1305   computeRegisterProperties(STI.getRegisterInfo());
1306 
1307   // The Freescale cores do better with aggressive inlining of memcpy and
1308   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1309   if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1310       Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1311     MaxStoresPerMemset = 32;
1312     MaxStoresPerMemsetOptSize = 16;
1313     MaxStoresPerMemcpy = 32;
1314     MaxStoresPerMemcpyOptSize = 8;
1315     MaxStoresPerMemmove = 32;
1316     MaxStoresPerMemmoveOptSize = 8;
1317   } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1318     // The A2 also benefits from (very) aggressive inlining of memcpy and
1319     // friends. The overhead of a the function call, even when warm, can be
1320     // over one hundred cycles.
1321     MaxStoresPerMemset = 128;
1322     MaxStoresPerMemcpy = 128;
1323     MaxStoresPerMemmove = 128;
1324     MaxLoadsPerMemcmp = 128;
1325   } else {
1326     MaxLoadsPerMemcmp = 8;
1327     MaxLoadsPerMemcmpOptSize = 4;
1328   }
1329 
1330   IsStrictFPEnabled = true;
1331 
1332   // Let the subtarget (CPU) decide if a predictable select is more expensive
1333   // than the corresponding branch. This information is used in CGP to decide
1334   // when to convert selects into branches.
1335   PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1336 }
1337 
1338 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1339 /// the desired ByVal argument alignment.
1340 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1341   if (MaxAlign == MaxMaxAlign)
1342     return;
1343   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1344     if (MaxMaxAlign >= 32 &&
1345         VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1346       MaxAlign = Align(32);
1347     else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1348              MaxAlign < 16)
1349       MaxAlign = Align(16);
1350   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1351     Align EltAlign;
1352     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1353     if (EltAlign > MaxAlign)
1354       MaxAlign = EltAlign;
1355   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1356     for (auto *EltTy : STy->elements()) {
1357       Align EltAlign;
1358       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1359       if (EltAlign > MaxAlign)
1360         MaxAlign = EltAlign;
1361       if (MaxAlign == MaxMaxAlign)
1362         break;
1363     }
1364   }
1365 }
1366 
1367 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1368 /// function arguments in the caller parameter area.
1369 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1370                                                   const DataLayout &DL) const {
1371   // 16byte and wider vectors are passed on 16byte boundary.
1372   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1373   Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1374   if (Subtarget.hasAltivec())
1375     getMaxByValAlign(Ty, Alignment, Align(16));
1376   return Alignment.value();
1377 }
1378 
1379 bool PPCTargetLowering::useSoftFloat() const {
1380   return Subtarget.useSoftFloat();
1381 }
1382 
1383 bool PPCTargetLowering::hasSPE() const {
1384   return Subtarget.hasSPE();
1385 }
1386 
1387 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1388   return VT.isScalarInteger();
1389 }
1390 
1391 /// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific
1392 /// type is cheaper than a multiply followed by a shift.
1393 /// This is true for words and doublewords on 64-bit PowerPC.
1394 bool PPCTargetLowering::isMulhCheaperThanMulShift(EVT Type) const {
1395   if (Subtarget.isPPC64() && (isOperationLegal(ISD::MULHS, Type) ||
1396                               isOperationLegal(ISD::MULHU, Type)))
1397     return true;
1398   return TargetLowering::isMulhCheaperThanMulShift(Type);
1399 }
1400 
1401 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1402   switch ((PPCISD::NodeType)Opcode) {
1403   case PPCISD::FIRST_NUMBER:    break;
1404   case PPCISD::FSEL:            return "PPCISD::FSEL";
1405   case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
1406   case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
1407   case PPCISD::FCFID:           return "PPCISD::FCFID";
1408   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1409   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1410   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1411   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1412   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1413   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1414   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1415   case PPCISD::FP_TO_UINT_IN_VSR:
1416                                 return "PPCISD::FP_TO_UINT_IN_VSR,";
1417   case PPCISD::FP_TO_SINT_IN_VSR:
1418                                 return "PPCISD::FP_TO_SINT_IN_VSR";
1419   case PPCISD::FRE:             return "PPCISD::FRE";
1420   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1421   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1422   case PPCISD::VPERM:           return "PPCISD::VPERM";
1423   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1424   case PPCISD::XXSPLTI_SP_TO_DP:
1425     return "PPCISD::XXSPLTI_SP_TO_DP";
1426   case PPCISD::XXSPLTI32DX:
1427     return "PPCISD::XXSPLTI32DX";
1428   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1429   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1430   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1431   case PPCISD::CMPB:            return "PPCISD::CMPB";
1432   case PPCISD::Hi:              return "PPCISD::Hi";
1433   case PPCISD::Lo:              return "PPCISD::Lo";
1434   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1435   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1436   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1437   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1438   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1439   case PPCISD::PROBED_ALLOCA:   return "PPCISD::PROBED_ALLOCA";
1440   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1441   case PPCISD::SRL:             return "PPCISD::SRL";
1442   case PPCISD::SRA:             return "PPCISD::SRA";
1443   case PPCISD::SHL:             return "PPCISD::SHL";
1444   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1445   case PPCISD::CALL:            return "PPCISD::CALL";
1446   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1447   case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1448   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1449   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1450   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1451   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1452   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1453   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1454   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1455   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1456   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1457   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1458   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1459   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1460   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1461   case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1462     return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1463   case PPCISD::ANDI_rec_1_EQ_BIT:
1464     return "PPCISD::ANDI_rec_1_EQ_BIT";
1465   case PPCISD::ANDI_rec_1_GT_BIT:
1466     return "PPCISD::ANDI_rec_1_GT_BIT";
1467   case PPCISD::VCMP:            return "PPCISD::VCMP";
1468   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1469   case PPCISD::LBRX:            return "PPCISD::LBRX";
1470   case PPCISD::STBRX:           return "PPCISD::STBRX";
1471   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1472   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1473   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1474   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1475   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1476   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1477   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1478   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1479   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1480   case PPCISD::ST_VSR_SCAL_INT:
1481                                 return "PPCISD::ST_VSR_SCAL_INT";
1482   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1483   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1484   case PPCISD::BDZ:             return "PPCISD::BDZ";
1485   case PPCISD::MFFS:            return "PPCISD::MFFS";
1486   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1487   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1488   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1489   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1490   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1491   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1492   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1493   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1494   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1495   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1496   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1497   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1498   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1499   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1500   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1501   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1502   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1503   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1504   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1505   case PPCISD::PADDI_DTPREL:
1506     return "PPCISD::PADDI_DTPREL";
1507   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1508   case PPCISD::SC:              return "PPCISD::SC";
1509   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1510   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1511   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1512   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1513   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1514   case PPCISD::VABSD:           return "PPCISD::VABSD";
1515   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1516   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1517   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1518   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1519   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1520   case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1521   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1522   case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1523     return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1524   case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1525     return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1526   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1527   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
1528   case PPCISD::STRICT_FADDRTZ:
1529     return "PPCISD::STRICT_FADDRTZ";
1530   case PPCISD::STRICT_FCTIDZ:
1531     return "PPCISD::STRICT_FCTIDZ";
1532   case PPCISD::STRICT_FCTIWZ:
1533     return "PPCISD::STRICT_FCTIWZ";
1534   case PPCISD::STRICT_FCTIDUZ:
1535     return "PPCISD::STRICT_FCTIDUZ";
1536   case PPCISD::STRICT_FCTIWUZ:
1537     return "PPCISD::STRICT_FCTIWUZ";
1538   case PPCISD::STRICT_FCFID:
1539     return "PPCISD::STRICT_FCFID";
1540   case PPCISD::STRICT_FCFIDU:
1541     return "PPCISD::STRICT_FCFIDU";
1542   case PPCISD::STRICT_FCFIDS:
1543     return "PPCISD::STRICT_FCFIDS";
1544   case PPCISD::STRICT_FCFIDUS:
1545     return "PPCISD::STRICT_FCFIDUS";
1546   case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
1547   }
1548   return nullptr;
1549 }
1550 
1551 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1552                                           EVT VT) const {
1553   if (!VT.isVector())
1554     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1555 
1556   return VT.changeVectorElementTypeToInteger();
1557 }
1558 
1559 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1560   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1561   return true;
1562 }
1563 
1564 //===----------------------------------------------------------------------===//
1565 // Node matching predicates, for use by the tblgen matching code.
1566 //===----------------------------------------------------------------------===//
1567 
1568 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1569 static bool isFloatingPointZero(SDValue Op) {
1570   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1571     return CFP->getValueAPF().isZero();
1572   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1573     // Maybe this has already been legalized into the constant pool?
1574     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1575       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1576         return CFP->getValueAPF().isZero();
1577   }
1578   return false;
1579 }
1580 
1581 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1582 /// true if Op is undef or if it matches the specified value.
1583 static bool isConstantOrUndef(int Op, int Val) {
1584   return Op < 0 || Op == Val;
1585 }
1586 
1587 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1588 /// VPKUHUM instruction.
1589 /// The ShuffleKind distinguishes between big-endian operations with
1590 /// two different inputs (0), either-endian operations with two identical
1591 /// inputs (1), and little-endian operations with two different inputs (2).
1592 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1593 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1594                                SelectionDAG &DAG) {
1595   bool IsLE = DAG.getDataLayout().isLittleEndian();
1596   if (ShuffleKind == 0) {
1597     if (IsLE)
1598       return false;
1599     for (unsigned i = 0; i != 16; ++i)
1600       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1601         return false;
1602   } else if (ShuffleKind == 2) {
1603     if (!IsLE)
1604       return false;
1605     for (unsigned i = 0; i != 16; ++i)
1606       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1607         return false;
1608   } else if (ShuffleKind == 1) {
1609     unsigned j = IsLE ? 0 : 1;
1610     for (unsigned i = 0; i != 8; ++i)
1611       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1612           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1613         return false;
1614   }
1615   return true;
1616 }
1617 
1618 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1619 /// VPKUWUM instruction.
1620 /// The ShuffleKind distinguishes between big-endian operations with
1621 /// two different inputs (0), either-endian operations with two identical
1622 /// inputs (1), and little-endian operations with two different inputs (2).
1623 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1624 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1625                                SelectionDAG &DAG) {
1626   bool IsLE = DAG.getDataLayout().isLittleEndian();
1627   if (ShuffleKind == 0) {
1628     if (IsLE)
1629       return false;
1630     for (unsigned i = 0; i != 16; i += 2)
1631       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1632           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1633         return false;
1634   } else if (ShuffleKind == 2) {
1635     if (!IsLE)
1636       return false;
1637     for (unsigned i = 0; i != 16; i += 2)
1638       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1639           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1640         return false;
1641   } else if (ShuffleKind == 1) {
1642     unsigned j = IsLE ? 0 : 2;
1643     for (unsigned i = 0; i != 8; i += 2)
1644       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1645           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1646           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1647           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1648         return false;
1649   }
1650   return true;
1651 }
1652 
1653 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1654 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1655 /// current subtarget.
1656 ///
1657 /// The ShuffleKind distinguishes between big-endian operations with
1658 /// two different inputs (0), either-endian operations with two identical
1659 /// inputs (1), and little-endian operations with two different inputs (2).
1660 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1661 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1662                                SelectionDAG &DAG) {
1663   const PPCSubtarget& Subtarget =
1664       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1665   if (!Subtarget.hasP8Vector())
1666     return false;
1667 
1668   bool IsLE = DAG.getDataLayout().isLittleEndian();
1669   if (ShuffleKind == 0) {
1670     if (IsLE)
1671       return false;
1672     for (unsigned i = 0; i != 16; i += 4)
1673       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1674           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1675           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1676           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1677         return false;
1678   } else if (ShuffleKind == 2) {
1679     if (!IsLE)
1680       return false;
1681     for (unsigned i = 0; i != 16; i += 4)
1682       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1683           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1684           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1685           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1686         return false;
1687   } else if (ShuffleKind == 1) {
1688     unsigned j = IsLE ? 0 : 4;
1689     for (unsigned i = 0; i != 8; i += 4)
1690       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1691           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1692           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1693           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1694           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1695           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1696           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1697           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1698         return false;
1699   }
1700   return true;
1701 }
1702 
1703 /// isVMerge - Common function, used to match vmrg* shuffles.
1704 ///
1705 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1706                      unsigned LHSStart, unsigned RHSStart) {
1707   if (N->getValueType(0) != MVT::v16i8)
1708     return false;
1709   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1710          "Unsupported merge size!");
1711 
1712   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1713     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1714       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1715                              LHSStart+j+i*UnitSize) ||
1716           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1717                              RHSStart+j+i*UnitSize))
1718         return false;
1719     }
1720   return true;
1721 }
1722 
1723 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1724 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1725 /// The ShuffleKind distinguishes between big-endian merges with two
1726 /// different inputs (0), either-endian merges with two identical inputs (1),
1727 /// and little-endian merges with two different inputs (2).  For the latter,
1728 /// the input operands are swapped (see PPCInstrAltivec.td).
1729 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1730                              unsigned ShuffleKind, SelectionDAG &DAG) {
1731   if (DAG.getDataLayout().isLittleEndian()) {
1732     if (ShuffleKind == 1) // unary
1733       return isVMerge(N, UnitSize, 0, 0);
1734     else if (ShuffleKind == 2) // swapped
1735       return isVMerge(N, UnitSize, 0, 16);
1736     else
1737       return false;
1738   } else {
1739     if (ShuffleKind == 1) // unary
1740       return isVMerge(N, UnitSize, 8, 8);
1741     else if (ShuffleKind == 0) // normal
1742       return isVMerge(N, UnitSize, 8, 24);
1743     else
1744       return false;
1745   }
1746 }
1747 
1748 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1749 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1750 /// The ShuffleKind distinguishes between big-endian merges with two
1751 /// different inputs (0), either-endian merges with two identical inputs (1),
1752 /// and little-endian merges with two different inputs (2).  For the latter,
1753 /// the input operands are swapped (see PPCInstrAltivec.td).
1754 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1755                              unsigned ShuffleKind, SelectionDAG &DAG) {
1756   if (DAG.getDataLayout().isLittleEndian()) {
1757     if (ShuffleKind == 1) // unary
1758       return isVMerge(N, UnitSize, 8, 8);
1759     else if (ShuffleKind == 2) // swapped
1760       return isVMerge(N, UnitSize, 8, 24);
1761     else
1762       return false;
1763   } else {
1764     if (ShuffleKind == 1) // unary
1765       return isVMerge(N, UnitSize, 0, 0);
1766     else if (ShuffleKind == 0) // normal
1767       return isVMerge(N, UnitSize, 0, 16);
1768     else
1769       return false;
1770   }
1771 }
1772 
1773 /**
1774  * Common function used to match vmrgew and vmrgow shuffles
1775  *
1776  * The indexOffset determines whether to look for even or odd words in
1777  * the shuffle mask. This is based on the of the endianness of the target
1778  * machine.
1779  *   - Little Endian:
1780  *     - Use offset of 0 to check for odd elements
1781  *     - Use offset of 4 to check for even elements
1782  *   - Big Endian:
1783  *     - Use offset of 0 to check for even elements
1784  *     - Use offset of 4 to check for odd elements
1785  * A detailed description of the vector element ordering for little endian and
1786  * big endian can be found at
1787  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1788  * Targeting your applications - what little endian and big endian IBM XL C/C++
1789  * compiler differences mean to you
1790  *
1791  * The mask to the shuffle vector instruction specifies the indices of the
1792  * elements from the two input vectors to place in the result. The elements are
1793  * numbered in array-access order, starting with the first vector. These vectors
1794  * are always of type v16i8, thus each vector will contain 16 elements of size
1795  * 8. More info on the shuffle vector can be found in the
1796  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1797  * Language Reference.
1798  *
1799  * The RHSStartValue indicates whether the same input vectors are used (unary)
1800  * or two different input vectors are used, based on the following:
1801  *   - If the instruction uses the same vector for both inputs, the range of the
1802  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1803  *     be 0.
1804  *   - If the instruction has two different vectors then the range of the
1805  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1806  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1807  *     to 31 specify elements in the second vector).
1808  *
1809  * \param[in] N The shuffle vector SD Node to analyze
1810  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1811  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1812  * vector to the shuffle_vector instruction
1813  * \return true iff this shuffle vector represents an even or odd word merge
1814  */
1815 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1816                      unsigned RHSStartValue) {
1817   if (N->getValueType(0) != MVT::v16i8)
1818     return false;
1819 
1820   for (unsigned i = 0; i < 2; ++i)
1821     for (unsigned j = 0; j < 4; ++j)
1822       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1823                              i*RHSStartValue+j+IndexOffset) ||
1824           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1825                              i*RHSStartValue+j+IndexOffset+8))
1826         return false;
1827   return true;
1828 }
1829 
1830 /**
1831  * Determine if the specified shuffle mask is suitable for the vmrgew or
1832  * vmrgow instructions.
1833  *
1834  * \param[in] N The shuffle vector SD Node to analyze
1835  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1836  * \param[in] ShuffleKind Identify the type of merge:
1837  *   - 0 = big-endian merge with two different inputs;
1838  *   - 1 = either-endian merge with two identical inputs;
1839  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1840  *     little-endian merges).
1841  * \param[in] DAG The current SelectionDAG
1842  * \return true iff this shuffle mask
1843  */
1844 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1845                               unsigned ShuffleKind, SelectionDAG &DAG) {
1846   if (DAG.getDataLayout().isLittleEndian()) {
1847     unsigned indexOffset = CheckEven ? 4 : 0;
1848     if (ShuffleKind == 1) // Unary
1849       return isVMerge(N, indexOffset, 0);
1850     else if (ShuffleKind == 2) // swapped
1851       return isVMerge(N, indexOffset, 16);
1852     else
1853       return false;
1854   }
1855   else {
1856     unsigned indexOffset = CheckEven ? 0 : 4;
1857     if (ShuffleKind == 1) // Unary
1858       return isVMerge(N, indexOffset, 0);
1859     else if (ShuffleKind == 0) // Normal
1860       return isVMerge(N, indexOffset, 16);
1861     else
1862       return false;
1863   }
1864   return false;
1865 }
1866 
1867 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1868 /// amount, otherwise return -1.
1869 /// The ShuffleKind distinguishes between big-endian operations with two
1870 /// different inputs (0), either-endian operations with two identical inputs
1871 /// (1), and little-endian operations with two different inputs (2).  For the
1872 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1873 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1874                              SelectionDAG &DAG) {
1875   if (N->getValueType(0) != MVT::v16i8)
1876     return -1;
1877 
1878   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1879 
1880   // Find the first non-undef value in the shuffle mask.
1881   unsigned i;
1882   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1883     /*search*/;
1884 
1885   if (i == 16) return -1;  // all undef.
1886 
1887   // Otherwise, check to see if the rest of the elements are consecutively
1888   // numbered from this value.
1889   unsigned ShiftAmt = SVOp->getMaskElt(i);
1890   if (ShiftAmt < i) return -1;
1891 
1892   ShiftAmt -= i;
1893   bool isLE = DAG.getDataLayout().isLittleEndian();
1894 
1895   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1896     // Check the rest of the elements to see if they are consecutive.
1897     for (++i; i != 16; ++i)
1898       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1899         return -1;
1900   } else if (ShuffleKind == 1) {
1901     // Check the rest of the elements to see if they are consecutive.
1902     for (++i; i != 16; ++i)
1903       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1904         return -1;
1905   } else
1906     return -1;
1907 
1908   if (isLE)
1909     ShiftAmt = 16 - ShiftAmt;
1910 
1911   return ShiftAmt;
1912 }
1913 
1914 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1915 /// specifies a splat of a single element that is suitable for input to
1916 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1917 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1918   assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1919          EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
1920 
1921   // The consecutive indices need to specify an element, not part of two
1922   // different elements.  So abandon ship early if this isn't the case.
1923   if (N->getMaskElt(0) % EltSize != 0)
1924     return false;
1925 
1926   // This is a splat operation if each element of the permute is the same, and
1927   // if the value doesn't reference the second vector.
1928   unsigned ElementBase = N->getMaskElt(0);
1929 
1930   // FIXME: Handle UNDEF elements too!
1931   if (ElementBase >= 16)
1932     return false;
1933 
1934   // Check that the indices are consecutive, in the case of a multi-byte element
1935   // splatted with a v16i8 mask.
1936   for (unsigned i = 1; i != EltSize; ++i)
1937     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1938       return false;
1939 
1940   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1941     if (N->getMaskElt(i) < 0) continue;
1942     for (unsigned j = 0; j != EltSize; ++j)
1943       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1944         return false;
1945   }
1946   return true;
1947 }
1948 
1949 /// Check that the mask is shuffling N byte elements. Within each N byte
1950 /// element of the mask, the indices could be either in increasing or
1951 /// decreasing order as long as they are consecutive.
1952 /// \param[in] N the shuffle vector SD Node to analyze
1953 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1954 /// Word/DoubleWord/QuadWord).
1955 /// \param[in] StepLen the delta indices number among the N byte element, if
1956 /// the mask is in increasing/decreasing order then it is 1/-1.
1957 /// \return true iff the mask is shuffling N byte elements.
1958 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1959                                    int StepLen) {
1960   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1961          "Unexpected element width.");
1962   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1963 
1964   unsigned NumOfElem = 16 / Width;
1965   unsigned MaskVal[16]; //  Width is never greater than 16
1966   for (unsigned i = 0; i < NumOfElem; ++i) {
1967     MaskVal[0] = N->getMaskElt(i * Width);
1968     if ((StepLen == 1) && (MaskVal[0] % Width)) {
1969       return false;
1970     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
1971       return false;
1972     }
1973 
1974     for (unsigned int j = 1; j < Width; ++j) {
1975       MaskVal[j] = N->getMaskElt(i * Width + j);
1976       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
1977         return false;
1978       }
1979     }
1980   }
1981 
1982   return true;
1983 }
1984 
1985 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
1986                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
1987   if (!isNByteElemShuffleMask(N, 4, 1))
1988     return false;
1989 
1990   // Now we look at mask elements 0,4,8,12
1991   unsigned M0 = N->getMaskElt(0) / 4;
1992   unsigned M1 = N->getMaskElt(4) / 4;
1993   unsigned M2 = N->getMaskElt(8) / 4;
1994   unsigned M3 = N->getMaskElt(12) / 4;
1995   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
1996   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
1997 
1998   // Below, let H and L be arbitrary elements of the shuffle mask
1999   // where H is in the range [4,7] and L is in the range [0,3].
2000   // H, 1, 2, 3 or L, 5, 6, 7
2001   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2002       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2003     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2004     InsertAtByte = IsLE ? 12 : 0;
2005     Swap = M0 < 4;
2006     return true;
2007   }
2008   // 0, H, 2, 3 or 4, L, 6, 7
2009   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2010       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2011     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2012     InsertAtByte = IsLE ? 8 : 4;
2013     Swap = M1 < 4;
2014     return true;
2015   }
2016   // 0, 1, H, 3 or 4, 5, L, 7
2017   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2018       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2019     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2020     InsertAtByte = IsLE ? 4 : 8;
2021     Swap = M2 < 4;
2022     return true;
2023   }
2024   // 0, 1, 2, H or 4, 5, 6, L
2025   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2026       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2027     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2028     InsertAtByte = IsLE ? 0 : 12;
2029     Swap = M3 < 4;
2030     return true;
2031   }
2032 
2033   // If both vector operands for the shuffle are the same vector, the mask will
2034   // contain only elements from the first one and the second one will be undef.
2035   if (N->getOperand(1).isUndef()) {
2036     ShiftElts = 0;
2037     Swap = true;
2038     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2039     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2040       InsertAtByte = IsLE ? 12 : 0;
2041       return true;
2042     }
2043     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2044       InsertAtByte = IsLE ? 8 : 4;
2045       return true;
2046     }
2047     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2048       InsertAtByte = IsLE ? 4 : 8;
2049       return true;
2050     }
2051     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2052       InsertAtByte = IsLE ? 0 : 12;
2053       return true;
2054     }
2055   }
2056 
2057   return false;
2058 }
2059 
2060 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2061                                bool &Swap, bool IsLE) {
2062   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2063   // Ensure each byte index of the word is consecutive.
2064   if (!isNByteElemShuffleMask(N, 4, 1))
2065     return false;
2066 
2067   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2068   unsigned M0 = N->getMaskElt(0) / 4;
2069   unsigned M1 = N->getMaskElt(4) / 4;
2070   unsigned M2 = N->getMaskElt(8) / 4;
2071   unsigned M3 = N->getMaskElt(12) / 4;
2072 
2073   // If both vector operands for the shuffle are the same vector, the mask will
2074   // contain only elements from the first one and the second one will be undef.
2075   if (N->getOperand(1).isUndef()) {
2076     assert(M0 < 4 && "Indexing into an undef vector?");
2077     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2078       return false;
2079 
2080     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2081     Swap = false;
2082     return true;
2083   }
2084 
2085   // Ensure each word index of the ShuffleVector Mask is consecutive.
2086   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2087     return false;
2088 
2089   if (IsLE) {
2090     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2091       // Input vectors don't need to be swapped if the leading element
2092       // of the result is one of the 3 left elements of the second vector
2093       // (or if there is no shift to be done at all).
2094       Swap = false;
2095       ShiftElts = (8 - M0) % 8;
2096     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2097       // Input vectors need to be swapped if the leading element
2098       // of the result is one of the 3 left elements of the first vector
2099       // (or if we're shifting by 4 - thereby simply swapping the vectors).
2100       Swap = true;
2101       ShiftElts = (4 - M0) % 4;
2102     }
2103 
2104     return true;
2105   } else {                                          // BE
2106     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2107       // Input vectors don't need to be swapped if the leading element
2108       // of the result is one of the 4 elements of the first vector.
2109       Swap = false;
2110       ShiftElts = M0;
2111     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2112       // Input vectors need to be swapped if the leading element
2113       // of the result is one of the 4 elements of the right vector.
2114       Swap = true;
2115       ShiftElts = M0 - 4;
2116     }
2117 
2118     return true;
2119   }
2120 }
2121 
2122 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2123   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2124 
2125   if (!isNByteElemShuffleMask(N, Width, -1))
2126     return false;
2127 
2128   for (int i = 0; i < 16; i += Width)
2129     if (N->getMaskElt(i) != i + Width - 1)
2130       return false;
2131 
2132   return true;
2133 }
2134 
2135 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2136   return isXXBRShuffleMaskHelper(N, 2);
2137 }
2138 
2139 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2140   return isXXBRShuffleMaskHelper(N, 4);
2141 }
2142 
2143 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2144   return isXXBRShuffleMaskHelper(N, 8);
2145 }
2146 
2147 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2148   return isXXBRShuffleMaskHelper(N, 16);
2149 }
2150 
2151 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2152 /// if the inputs to the instruction should be swapped and set \p DM to the
2153 /// value for the immediate.
2154 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2155 /// AND element 0 of the result comes from the first input (LE) or second input
2156 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2157 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2158 /// mask.
2159 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2160                                bool &Swap, bool IsLE) {
2161   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2162 
2163   // Ensure each byte index of the double word is consecutive.
2164   if (!isNByteElemShuffleMask(N, 8, 1))
2165     return false;
2166 
2167   unsigned M0 = N->getMaskElt(0) / 8;
2168   unsigned M1 = N->getMaskElt(8) / 8;
2169   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2170 
2171   // If both vector operands for the shuffle are the same vector, the mask will
2172   // contain only elements from the first one and the second one will be undef.
2173   if (N->getOperand(1).isUndef()) {
2174     if ((M0 | M1) < 2) {
2175       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2176       Swap = false;
2177       return true;
2178     } else
2179       return false;
2180   }
2181 
2182   if (IsLE) {
2183     if (M0 > 1 && M1 < 2) {
2184       Swap = false;
2185     } else if (M0 < 2 && M1 > 1) {
2186       M0 = (M0 + 2) % 4;
2187       M1 = (M1 + 2) % 4;
2188       Swap = true;
2189     } else
2190       return false;
2191 
2192     // Note: if control flow comes here that means Swap is already set above
2193     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2194     return true;
2195   } else { // BE
2196     if (M0 < 2 && M1 > 1) {
2197       Swap = false;
2198     } else if (M0 > 1 && M1 < 2) {
2199       M0 = (M0 + 2) % 4;
2200       M1 = (M1 + 2) % 4;
2201       Swap = true;
2202     } else
2203       return false;
2204 
2205     // Note: if control flow comes here that means Swap is already set above
2206     DM = (M0 << 1) + (M1 & 1);
2207     return true;
2208   }
2209 }
2210 
2211 
2212 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2213 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2214 /// elements are counted from the left of the vector register).
2215 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2216                                          SelectionDAG &DAG) {
2217   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2218   assert(isSplatShuffleMask(SVOp, EltSize));
2219   if (DAG.getDataLayout().isLittleEndian())
2220     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2221   else
2222     return SVOp->getMaskElt(0) / EltSize;
2223 }
2224 
2225 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2226 /// by using a vspltis[bhw] instruction of the specified element size, return
2227 /// the constant being splatted.  The ByteSize field indicates the number of
2228 /// bytes of each element [124] -> [bhw].
2229 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2230   SDValue OpVal(nullptr, 0);
2231 
2232   // If ByteSize of the splat is bigger than the element size of the
2233   // build_vector, then we have a case where we are checking for a splat where
2234   // multiple elements of the buildvector are folded together into a single
2235   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2236   unsigned EltSize = 16/N->getNumOperands();
2237   if (EltSize < ByteSize) {
2238     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2239     SDValue UniquedVals[4];
2240     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2241 
2242     // See if all of the elements in the buildvector agree across.
2243     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2244       if (N->getOperand(i).isUndef()) continue;
2245       // If the element isn't a constant, bail fully out.
2246       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2247 
2248       if (!UniquedVals[i&(Multiple-1)].getNode())
2249         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2250       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2251         return SDValue();  // no match.
2252     }
2253 
2254     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2255     // either constant or undef values that are identical for each chunk.  See
2256     // if these chunks can form into a larger vspltis*.
2257 
2258     // Check to see if all of the leading entries are either 0 or -1.  If
2259     // neither, then this won't fit into the immediate field.
2260     bool LeadingZero = true;
2261     bool LeadingOnes = true;
2262     for (unsigned i = 0; i != Multiple-1; ++i) {
2263       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2264 
2265       LeadingZero &= isNullConstant(UniquedVals[i]);
2266       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2267     }
2268     // Finally, check the least significant entry.
2269     if (LeadingZero) {
2270       if (!UniquedVals[Multiple-1].getNode())
2271         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2272       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2273       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2274         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2275     }
2276     if (LeadingOnes) {
2277       if (!UniquedVals[Multiple-1].getNode())
2278         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2279       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2280       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2281         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2282     }
2283 
2284     return SDValue();
2285   }
2286 
2287   // Check to see if this buildvec has a single non-undef value in its elements.
2288   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2289     if (N->getOperand(i).isUndef()) continue;
2290     if (!OpVal.getNode())
2291       OpVal = N->getOperand(i);
2292     else if (OpVal != N->getOperand(i))
2293       return SDValue();
2294   }
2295 
2296   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2297 
2298   unsigned ValSizeInBytes = EltSize;
2299   uint64_t Value = 0;
2300   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2301     Value = CN->getZExtValue();
2302   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2303     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2304     Value = FloatToBits(CN->getValueAPF().convertToFloat());
2305   }
2306 
2307   // If the splat value is larger than the element value, then we can never do
2308   // this splat.  The only case that we could fit the replicated bits into our
2309   // immediate field for would be zero, and we prefer to use vxor for it.
2310   if (ValSizeInBytes < ByteSize) return SDValue();
2311 
2312   // If the element value is larger than the splat value, check if it consists
2313   // of a repeated bit pattern of size ByteSize.
2314   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2315     return SDValue();
2316 
2317   // Properly sign extend the value.
2318   int MaskVal = SignExtend32(Value, ByteSize * 8);
2319 
2320   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2321   if (MaskVal == 0) return SDValue();
2322 
2323   // Finally, if this value fits in a 5 bit sext field, return it
2324   if (SignExtend32<5>(MaskVal) == MaskVal)
2325     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2326   return SDValue();
2327 }
2328 
2329 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2330 /// amount, otherwise return -1.
2331 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2332   EVT VT = N->getValueType(0);
2333   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2334     return -1;
2335 
2336   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2337 
2338   // Find the first non-undef value in the shuffle mask.
2339   unsigned i;
2340   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2341     /*search*/;
2342 
2343   if (i == 4) return -1;  // all undef.
2344 
2345   // Otherwise, check to see if the rest of the elements are consecutively
2346   // numbered from this value.
2347   unsigned ShiftAmt = SVOp->getMaskElt(i);
2348   if (ShiftAmt < i) return -1;
2349   ShiftAmt -= i;
2350 
2351   // Check the rest of the elements to see if they are consecutive.
2352   for (++i; i != 4; ++i)
2353     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2354       return -1;
2355 
2356   return ShiftAmt;
2357 }
2358 
2359 //===----------------------------------------------------------------------===//
2360 //  Addressing Mode Selection
2361 //===----------------------------------------------------------------------===//
2362 
2363 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2364 /// or 64-bit immediate, and if the value can be accurately represented as a
2365 /// sign extension from a 16-bit value.  If so, this returns true and the
2366 /// immediate.
2367 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2368   if (!isa<ConstantSDNode>(N))
2369     return false;
2370 
2371   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2372   if (N->getValueType(0) == MVT::i32)
2373     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2374   else
2375     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2376 }
2377 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2378   return isIntS16Immediate(Op.getNode(), Imm);
2379 }
2380 
2381 
2382 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2383 /// be represented as an indexed [r+r] operation.
2384 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2385                                                SDValue &Index,
2386                                                SelectionDAG &DAG) const {
2387   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2388       UI != E; ++UI) {
2389     if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2390       if (Memop->getMemoryVT() == MVT::f64) {
2391           Base = N.getOperand(0);
2392           Index = N.getOperand(1);
2393           return true;
2394       }
2395     }
2396   }
2397   return false;
2398 }
2399 
2400 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2401 /// can be represented as an indexed [r+r] operation.  Returns false if it
2402 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2403 /// non-zero and N can be represented by a base register plus a signed 16-bit
2404 /// displacement, make a more precise judgement by checking (displacement % \p
2405 /// EncodingAlignment).
2406 bool PPCTargetLowering::SelectAddressRegReg(
2407     SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2408     MaybeAlign EncodingAlignment) const {
2409   // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2410   // a [pc+imm].
2411   if (SelectAddressPCRel(N, Base))
2412     return false;
2413 
2414   int16_t Imm = 0;
2415   if (N.getOpcode() == ISD::ADD) {
2416     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2417     // SPE load/store can only handle 8-bit offsets.
2418     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2419         return true;
2420     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2421         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2422       return false; // r+i
2423     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2424       return false;    // r+i
2425 
2426     Base = N.getOperand(0);
2427     Index = N.getOperand(1);
2428     return true;
2429   } else if (N.getOpcode() == ISD::OR) {
2430     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2431         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2432       return false; // r+i can fold it if we can.
2433 
2434     // If this is an or of disjoint bitfields, we can codegen this as an add
2435     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2436     // disjoint.
2437     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2438 
2439     if (LHSKnown.Zero.getBoolValue()) {
2440       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2441       // If all of the bits are known zero on the LHS or RHS, the add won't
2442       // carry.
2443       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2444         Base = N.getOperand(0);
2445         Index = N.getOperand(1);
2446         return true;
2447       }
2448     }
2449   }
2450 
2451   return false;
2452 }
2453 
2454 // If we happen to be doing an i64 load or store into a stack slot that has
2455 // less than a 4-byte alignment, then the frame-index elimination may need to
2456 // use an indexed load or store instruction (because the offset may not be a
2457 // multiple of 4). The extra register needed to hold the offset comes from the
2458 // register scavenger, and it is possible that the scavenger will need to use
2459 // an emergency spill slot. As a result, we need to make sure that a spill slot
2460 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2461 // stack slot.
2462 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2463   // FIXME: This does not handle the LWA case.
2464   if (VT != MVT::i64)
2465     return;
2466 
2467   // NOTE: We'll exclude negative FIs here, which come from argument
2468   // lowering, because there are no known test cases triggering this problem
2469   // using packed structures (or similar). We can remove this exclusion if
2470   // we find such a test case. The reason why this is so test-case driven is
2471   // because this entire 'fixup' is only to prevent crashes (from the
2472   // register scavenger) on not-really-valid inputs. For example, if we have:
2473   //   %a = alloca i1
2474   //   %b = bitcast i1* %a to i64*
2475   //   store i64* a, i64 b
2476   // then the store should really be marked as 'align 1', but is not. If it
2477   // were marked as 'align 1' then the indexed form would have been
2478   // instruction-selected initially, and the problem this 'fixup' is preventing
2479   // won't happen regardless.
2480   if (FrameIdx < 0)
2481     return;
2482 
2483   MachineFunction &MF = DAG.getMachineFunction();
2484   MachineFrameInfo &MFI = MF.getFrameInfo();
2485 
2486   if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2487     return;
2488 
2489   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2490   FuncInfo->setHasNonRISpills();
2491 }
2492 
2493 /// Returns true if the address N can be represented by a base register plus
2494 /// a signed 16-bit displacement [r+imm], and if it is not better
2495 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2496 /// displacements that are multiples of that value.
2497 bool PPCTargetLowering::SelectAddressRegImm(
2498     SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2499     MaybeAlign EncodingAlignment) const {
2500   // FIXME dl should come from parent load or store, not from address
2501   SDLoc dl(N);
2502 
2503   // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2504   // a [pc+imm].
2505   if (SelectAddressPCRel(N, Base))
2506     return false;
2507 
2508   // If this can be more profitably realized as r+r, fail.
2509   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2510     return false;
2511 
2512   if (N.getOpcode() == ISD::ADD) {
2513     int16_t imm = 0;
2514     if (isIntS16Immediate(N.getOperand(1), imm) &&
2515         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2516       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2517       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2518         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2519         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2520       } else {
2521         Base = N.getOperand(0);
2522       }
2523       return true; // [r+i]
2524     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2525       // Match LOAD (ADD (X, Lo(G))).
2526       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2527              && "Cannot handle constant offsets yet!");
2528       Disp = N.getOperand(1).getOperand(0);  // The global address.
2529       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2530              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2531              Disp.getOpcode() == ISD::TargetConstantPool ||
2532              Disp.getOpcode() == ISD::TargetJumpTable);
2533       Base = N.getOperand(0);
2534       return true;  // [&g+r]
2535     }
2536   } else if (N.getOpcode() == ISD::OR) {
2537     int16_t imm = 0;
2538     if (isIntS16Immediate(N.getOperand(1), imm) &&
2539         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2540       // If this is an or of disjoint bitfields, we can codegen this as an add
2541       // (for better address arithmetic) if the LHS and RHS of the OR are
2542       // provably disjoint.
2543       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2544 
2545       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2546         // If all of the bits are known zero on the LHS or RHS, the add won't
2547         // carry.
2548         if (FrameIndexSDNode *FI =
2549               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2550           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2551           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2552         } else {
2553           Base = N.getOperand(0);
2554         }
2555         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2556         return true;
2557       }
2558     }
2559   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2560     // Loading from a constant address.
2561 
2562     // If this address fits entirely in a 16-bit sext immediate field, codegen
2563     // this as "d, 0"
2564     int16_t Imm;
2565     if (isIntS16Immediate(CN, Imm) &&
2566         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2567       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2568       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2569                              CN->getValueType(0));
2570       return true;
2571     }
2572 
2573     // Handle 32-bit sext immediates with LIS + addr mode.
2574     if ((CN->getValueType(0) == MVT::i32 ||
2575          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2576         (!EncodingAlignment ||
2577          isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2578       int Addr = (int)CN->getZExtValue();
2579 
2580       // Otherwise, break this down into an LIS + disp.
2581       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2582 
2583       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2584                                    MVT::i32);
2585       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2586       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2587       return true;
2588     }
2589   }
2590 
2591   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2592   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2593     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2594     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2595   } else
2596     Base = N;
2597   return true;      // [r+0]
2598 }
2599 
2600 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2601 /// represented as an indexed [r+r] operation.
2602 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2603                                                 SDValue &Index,
2604                                                 SelectionDAG &DAG) const {
2605   // Check to see if we can easily represent this as an [r+r] address.  This
2606   // will fail if it thinks that the address is more profitably represented as
2607   // reg+imm, e.g. where imm = 0.
2608   if (SelectAddressRegReg(N, Base, Index, DAG))
2609     return true;
2610 
2611   // If the address is the result of an add, we will utilize the fact that the
2612   // address calculation includes an implicit add.  However, we can reduce
2613   // register pressure if we do not materialize a constant just for use as the
2614   // index register.  We only get rid of the add if it is not an add of a
2615   // value and a 16-bit signed constant and both have a single use.
2616   int16_t imm = 0;
2617   if (N.getOpcode() == ISD::ADD &&
2618       (!isIntS16Immediate(N.getOperand(1), imm) ||
2619        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2620     Base = N.getOperand(0);
2621     Index = N.getOperand(1);
2622     return true;
2623   }
2624 
2625   // Otherwise, do it the hard way, using R0 as the base register.
2626   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2627                          N.getValueType());
2628   Index = N;
2629   return true;
2630 }
2631 
2632 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2633   Ty *PCRelCand = dyn_cast<Ty>(N);
2634   return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2635 }
2636 
2637 /// Returns true if this address is a PC Relative address.
2638 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2639 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2640 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2641   // This is a materialize PC Relative node. Always select this as PC Relative.
2642   Base = N;
2643   if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2644     return true;
2645   if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2646       isValidPCRelNode<GlobalAddressSDNode>(N) ||
2647       isValidPCRelNode<JumpTableSDNode>(N) ||
2648       isValidPCRelNode<BlockAddressSDNode>(N))
2649     return true;
2650   return false;
2651 }
2652 
2653 /// Returns true if we should use a direct load into vector instruction
2654 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2655 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2656 
2657   // If there are any other uses other than scalar to vector, then we should
2658   // keep it as a scalar load -> direct move pattern to prevent multiple
2659   // loads.
2660   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2661   if (!LD)
2662     return false;
2663 
2664   EVT MemVT = LD->getMemoryVT();
2665   if (!MemVT.isSimple())
2666     return false;
2667   switch(MemVT.getSimpleVT().SimpleTy) {
2668   case MVT::i64:
2669     break;
2670   case MVT::i32:
2671     if (!ST.hasP8Vector())
2672       return false;
2673     break;
2674   case MVT::i16:
2675   case MVT::i8:
2676     if (!ST.hasP9Vector())
2677       return false;
2678     break;
2679   default:
2680     return false;
2681   }
2682 
2683   SDValue LoadedVal(N, 0);
2684   if (!LoadedVal.hasOneUse())
2685     return false;
2686 
2687   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2688        UI != UE; ++UI)
2689     if (UI.getUse().get().getResNo() == 0 &&
2690         UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2691         UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2692       return false;
2693 
2694   return true;
2695 }
2696 
2697 /// getPreIndexedAddressParts - returns true by value, base pointer and
2698 /// offset pointer and addressing mode by reference if the node's address
2699 /// can be legally represented as pre-indexed load / store address.
2700 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2701                                                   SDValue &Offset,
2702                                                   ISD::MemIndexedMode &AM,
2703                                                   SelectionDAG &DAG) const {
2704   if (DisablePPCPreinc) return false;
2705 
2706   bool isLoad = true;
2707   SDValue Ptr;
2708   EVT VT;
2709   unsigned Alignment;
2710   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2711     Ptr = LD->getBasePtr();
2712     VT = LD->getMemoryVT();
2713     Alignment = LD->getAlignment();
2714   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2715     Ptr = ST->getBasePtr();
2716     VT  = ST->getMemoryVT();
2717     Alignment = ST->getAlignment();
2718     isLoad = false;
2719   } else
2720     return false;
2721 
2722   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2723   // instructions because we can fold these into a more efficient instruction
2724   // instead, (such as LXSD).
2725   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2726     return false;
2727   }
2728 
2729   // PowerPC doesn't have preinc load/store instructions for vectors
2730   if (VT.isVector())
2731     return false;
2732 
2733   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2734     // Common code will reject creating a pre-inc form if the base pointer
2735     // is a frame index, or if N is a store and the base pointer is either
2736     // the same as or a predecessor of the value being stored.  Check for
2737     // those situations here, and try with swapped Base/Offset instead.
2738     bool Swap = false;
2739 
2740     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2741       Swap = true;
2742     else if (!isLoad) {
2743       SDValue Val = cast<StoreSDNode>(N)->getValue();
2744       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2745         Swap = true;
2746     }
2747 
2748     if (Swap)
2749       std::swap(Base, Offset);
2750 
2751     AM = ISD::PRE_INC;
2752     return true;
2753   }
2754 
2755   // LDU/STU can only handle immediates that are a multiple of 4.
2756   if (VT != MVT::i64) {
2757     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2758       return false;
2759   } else {
2760     // LDU/STU need an address with at least 4-byte alignment.
2761     if (Alignment < 4)
2762       return false;
2763 
2764     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2765       return false;
2766   }
2767 
2768   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2769     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2770     // sext i32 to i64 when addr mode is r+i.
2771     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2772         LD->getExtensionType() == ISD::SEXTLOAD &&
2773         isa<ConstantSDNode>(Offset))
2774       return false;
2775   }
2776 
2777   AM = ISD::PRE_INC;
2778   return true;
2779 }
2780 
2781 //===----------------------------------------------------------------------===//
2782 //  LowerOperation implementation
2783 //===----------------------------------------------------------------------===//
2784 
2785 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2786 /// and LoOpFlags to the target MO flags.
2787 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2788                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2789                                const GlobalValue *GV = nullptr) {
2790   HiOpFlags = PPCII::MO_HA;
2791   LoOpFlags = PPCII::MO_LO;
2792 
2793   // Don't use the pic base if not in PIC relocation model.
2794   if (IsPIC) {
2795     HiOpFlags |= PPCII::MO_PIC_FLAG;
2796     LoOpFlags |= PPCII::MO_PIC_FLAG;
2797   }
2798 }
2799 
2800 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2801                              SelectionDAG &DAG) {
2802   SDLoc DL(HiPart);
2803   EVT PtrVT = HiPart.getValueType();
2804   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2805 
2806   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2807   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2808 
2809   // With PIC, the first instruction is actually "GR+hi(&G)".
2810   if (isPIC)
2811     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2812                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2813 
2814   // Generate non-pic code that has direct accesses to the constant pool.
2815   // The address of the global is just (hi(&g)+lo(&g)).
2816   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2817 }
2818 
2819 static void setUsesTOCBasePtr(MachineFunction &MF) {
2820   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2821   FuncInfo->setUsesTOCBasePtr();
2822 }
2823 
2824 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2825   setUsesTOCBasePtr(DAG.getMachineFunction());
2826 }
2827 
2828 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2829                                        SDValue GA) const {
2830   const bool Is64Bit = Subtarget.isPPC64();
2831   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2832   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2833                         : Subtarget.isAIXABI()
2834                               ? DAG.getRegister(PPC::R2, VT)
2835                               : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2836   SDValue Ops[] = { GA, Reg };
2837   return DAG.getMemIntrinsicNode(
2838       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2839       MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2840       MachineMemOperand::MOLoad);
2841 }
2842 
2843 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2844                                              SelectionDAG &DAG) const {
2845   EVT PtrVT = Op.getValueType();
2846   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2847   const Constant *C = CP->getConstVal();
2848 
2849   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2850   // The actual address of the GlobalValue is stored in the TOC.
2851   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2852     if (Subtarget.isUsingPCRelativeCalls()) {
2853       SDLoc DL(CP);
2854       EVT Ty = getPointerTy(DAG.getDataLayout());
2855       SDValue ConstPool = DAG.getTargetConstantPool(
2856           C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2857       return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2858     }
2859     setUsesTOCBasePtr(DAG);
2860     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2861     return getTOCEntry(DAG, SDLoc(CP), GA);
2862   }
2863 
2864   unsigned MOHiFlag, MOLoFlag;
2865   bool IsPIC = isPositionIndependent();
2866   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2867 
2868   if (IsPIC && Subtarget.isSVR4ABI()) {
2869     SDValue GA =
2870         DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2871     return getTOCEntry(DAG, SDLoc(CP), GA);
2872   }
2873 
2874   SDValue CPIHi =
2875       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2876   SDValue CPILo =
2877       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2878   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2879 }
2880 
2881 // For 64-bit PowerPC, prefer the more compact relative encodings.
2882 // This trades 32 bits per jump table entry for one or two instructions
2883 // on the jump site.
2884 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2885   if (isJumpTableRelative())
2886     return MachineJumpTableInfo::EK_LabelDifference32;
2887 
2888   return TargetLowering::getJumpTableEncoding();
2889 }
2890 
2891 bool PPCTargetLowering::isJumpTableRelative() const {
2892   if (UseAbsoluteJumpTables)
2893     return false;
2894   if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2895     return true;
2896   return TargetLowering::isJumpTableRelative();
2897 }
2898 
2899 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2900                                                     SelectionDAG &DAG) const {
2901   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2902     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2903 
2904   switch (getTargetMachine().getCodeModel()) {
2905   case CodeModel::Small:
2906   case CodeModel::Medium:
2907     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
2908   default:
2909     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
2910                        getPointerTy(DAG.getDataLayout()));
2911   }
2912 }
2913 
2914 const MCExpr *
2915 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
2916                                                 unsigned JTI,
2917                                                 MCContext &Ctx) const {
2918   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
2919     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2920 
2921   switch (getTargetMachine().getCodeModel()) {
2922   case CodeModel::Small:
2923   case CodeModel::Medium:
2924     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2925   default:
2926     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2927   }
2928 }
2929 
2930 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2931   EVT PtrVT = Op.getValueType();
2932   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2933 
2934   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2935   if (Subtarget.isUsingPCRelativeCalls()) {
2936     SDLoc DL(JT);
2937     EVT Ty = getPointerTy(DAG.getDataLayout());
2938     SDValue GA =
2939         DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
2940     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2941     return MatAddr;
2942   }
2943 
2944   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2945   // The actual address of the GlobalValue is stored in the TOC.
2946   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2947     setUsesTOCBasePtr(DAG);
2948     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2949     return getTOCEntry(DAG, SDLoc(JT), GA);
2950   }
2951 
2952   unsigned MOHiFlag, MOLoFlag;
2953   bool IsPIC = isPositionIndependent();
2954   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2955 
2956   if (IsPIC && Subtarget.isSVR4ABI()) {
2957     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2958                                         PPCII::MO_PIC_FLAG);
2959     return getTOCEntry(DAG, SDLoc(GA), GA);
2960   }
2961 
2962   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2963   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2964   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
2965 }
2966 
2967 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2968                                              SelectionDAG &DAG) const {
2969   EVT PtrVT = Op.getValueType();
2970   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2971   const BlockAddress *BA = BASDN->getBlockAddress();
2972 
2973   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
2974   if (Subtarget.isUsingPCRelativeCalls()) {
2975     SDLoc DL(BASDN);
2976     EVT Ty = getPointerTy(DAG.getDataLayout());
2977     SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
2978                                            PPCII::MO_PCREL_FLAG);
2979     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
2980     return MatAddr;
2981   }
2982 
2983   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2984   // The actual BlockAddress is stored in the TOC.
2985   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2986     setUsesTOCBasePtr(DAG);
2987     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2988     return getTOCEntry(DAG, SDLoc(BASDN), GA);
2989   }
2990 
2991   // 32-bit position-independent ELF stores the BlockAddress in the .got.
2992   if (Subtarget.is32BitELFABI() && isPositionIndependent())
2993     return getTOCEntry(
2994         DAG, SDLoc(BASDN),
2995         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
2996 
2997   unsigned MOHiFlag, MOLoFlag;
2998   bool IsPIC = isPositionIndependent();
2999   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3000   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3001   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3002   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3003 }
3004 
3005 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3006                                               SelectionDAG &DAG) const {
3007   // FIXME: TLS addresses currently use medium model code sequences,
3008   // which is the most useful form.  Eventually support for small and
3009   // large models could be added if users need it, at the cost of
3010   // additional complexity.
3011   if (Subtarget.isUsingPCRelativeCalls() && !EnablePPCPCRelTLS)
3012     report_fatal_error("Thread local storage is not supported with pc-relative"
3013                        " addressing - please compile with -mno-pcrel");
3014   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3015   if (DAG.getTarget().useEmulatedTLS())
3016     return LowerToTLSEmulatedModel(GA, DAG);
3017 
3018   SDLoc dl(GA);
3019   const GlobalValue *GV = GA->getGlobal();
3020   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3021   bool is64bit = Subtarget.isPPC64();
3022   const Module *M = DAG.getMachineFunction().getFunction().getParent();
3023   PICLevel::Level picLevel = M->getPICLevel();
3024 
3025   const TargetMachine &TM = getTargetMachine();
3026   TLSModel::Model Model = TM.getTLSModel(GV);
3027 
3028   if (Model == TLSModel::LocalExec) {
3029     if (Subtarget.isUsingPCRelativeCalls()) {
3030       SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3031       SDValue TGA = DAG.getTargetGlobalAddress(
3032           GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3033       SDValue MatAddr =
3034           DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3035       return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3036     }
3037 
3038     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3039                                                PPCII::MO_TPREL_HA);
3040     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3041                                                PPCII::MO_TPREL_LO);
3042     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3043                              : DAG.getRegister(PPC::R2, MVT::i32);
3044 
3045     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3046     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3047   }
3048 
3049   if (Model == TLSModel::InitialExec) {
3050     bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3051     SDValue TGA = DAG.getTargetGlobalAddress(
3052         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3053     SDValue TGATLS = DAG.getTargetGlobalAddress(
3054         GV, dl, PtrVT, 0,
3055         IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3056     SDValue TPOffset;
3057     if (IsPCRel) {
3058       SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3059       TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3060                              MachinePointerInfo());
3061     } else {
3062       SDValue GOTPtr;
3063       if (is64bit) {
3064         setUsesTOCBasePtr(DAG);
3065         SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3066         GOTPtr =
3067             DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3068       } else {
3069         if (!TM.isPositionIndependent())
3070           GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3071         else if (picLevel == PICLevel::SmallPIC)
3072           GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3073         else
3074           GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3075       }
3076       TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3077     }
3078     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3079   }
3080 
3081   if (Model == TLSModel::GeneralDynamic) {
3082     if (Subtarget.isUsingPCRelativeCalls()) {
3083       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3084                                                PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3085       return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3086     }
3087 
3088     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3089     SDValue GOTPtr;
3090     if (is64bit) {
3091       setUsesTOCBasePtr(DAG);
3092       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3093       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3094                                    GOTReg, TGA);
3095     } else {
3096       if (picLevel == PICLevel::SmallPIC)
3097         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3098       else
3099         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3100     }
3101     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3102                        GOTPtr, TGA, TGA);
3103   }
3104 
3105   if (Model == TLSModel::LocalDynamic) {
3106     if (Subtarget.isUsingPCRelativeCalls()) {
3107       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3108                                                PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3109       SDValue MatPCRel =
3110           DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3111       return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3112     }
3113 
3114     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3115     SDValue GOTPtr;
3116     if (is64bit) {
3117       setUsesTOCBasePtr(DAG);
3118       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3119       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3120                            GOTReg, TGA);
3121     } else {
3122       if (picLevel == PICLevel::SmallPIC)
3123         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3124       else
3125         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3126     }
3127     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3128                                   PtrVT, GOTPtr, TGA, TGA);
3129     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3130                                       PtrVT, TLSAddr, TGA);
3131     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3132   }
3133 
3134   llvm_unreachable("Unknown TLS model!");
3135 }
3136 
3137 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3138                                               SelectionDAG &DAG) const {
3139   EVT PtrVT = Op.getValueType();
3140   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3141   SDLoc DL(GSDN);
3142   const GlobalValue *GV = GSDN->getGlobal();
3143 
3144   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3145   // The actual address of the GlobalValue is stored in the TOC.
3146   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3147     if (Subtarget.isUsingPCRelativeCalls()) {
3148       EVT Ty = getPointerTy(DAG.getDataLayout());
3149       if (isAccessedAsGotIndirect(Op)) {
3150         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3151                                                 PPCII::MO_PCREL_FLAG |
3152                                                     PPCII::MO_GOT_FLAG);
3153         SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3154         SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3155                                    MachinePointerInfo());
3156         return Load;
3157       } else {
3158         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3159                                                 PPCII::MO_PCREL_FLAG);
3160         return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3161       }
3162     }
3163     setUsesTOCBasePtr(DAG);
3164     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3165     return getTOCEntry(DAG, DL, GA);
3166   }
3167 
3168   unsigned MOHiFlag, MOLoFlag;
3169   bool IsPIC = isPositionIndependent();
3170   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3171 
3172   if (IsPIC && Subtarget.isSVR4ABI()) {
3173     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3174                                             GSDN->getOffset(),
3175                                             PPCII::MO_PIC_FLAG);
3176     return getTOCEntry(DAG, DL, GA);
3177   }
3178 
3179   SDValue GAHi =
3180     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3181   SDValue GALo =
3182     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3183 
3184   return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3185 }
3186 
3187 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3188   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3189   SDLoc dl(Op);
3190 
3191   if (Op.getValueType() == MVT::v2i64) {
3192     // When the operands themselves are v2i64 values, we need to do something
3193     // special because VSX has no underlying comparison operations for these.
3194     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3195       // Equality can be handled by casting to the legal type for Altivec
3196       // comparisons, everything else needs to be expanded.
3197       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3198         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3199                  DAG.getSetCC(dl, MVT::v4i32,
3200                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3201                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3202                    CC));
3203       }
3204 
3205       return SDValue();
3206     }
3207 
3208     // We handle most of these in the usual way.
3209     return Op;
3210   }
3211 
3212   // If we're comparing for equality to zero, expose the fact that this is
3213   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3214   // fold the new nodes.
3215   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3216     return V;
3217 
3218   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3219     // Leave comparisons against 0 and -1 alone for now, since they're usually
3220     // optimized.  FIXME: revisit this when we can custom lower all setcc
3221     // optimizations.
3222     if (C->isAllOnesValue() || C->isNullValue())
3223       return SDValue();
3224   }
3225 
3226   // If we have an integer seteq/setne, turn it into a compare against zero
3227   // by xor'ing the rhs with the lhs, which is faster than setting a
3228   // condition register, reading it back out, and masking the correct bit.  The
3229   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3230   // the result to other bit-twiddling opportunities.
3231   EVT LHSVT = Op.getOperand(0).getValueType();
3232   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3233     EVT VT = Op.getValueType();
3234     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3235                                 Op.getOperand(1));
3236     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3237   }
3238   return SDValue();
3239 }
3240 
3241 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3242   SDNode *Node = Op.getNode();
3243   EVT VT = Node->getValueType(0);
3244   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3245   SDValue InChain = Node->getOperand(0);
3246   SDValue VAListPtr = Node->getOperand(1);
3247   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3248   SDLoc dl(Node);
3249 
3250   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3251 
3252   // gpr_index
3253   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3254                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3255   InChain = GprIndex.getValue(1);
3256 
3257   if (VT == MVT::i64) {
3258     // Check if GprIndex is even
3259     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3260                                  DAG.getConstant(1, dl, MVT::i32));
3261     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3262                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3263     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3264                                           DAG.getConstant(1, dl, MVT::i32));
3265     // Align GprIndex to be even if it isn't
3266     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3267                            GprIndex);
3268   }
3269 
3270   // fpr index is 1 byte after gpr
3271   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3272                                DAG.getConstant(1, dl, MVT::i32));
3273 
3274   // fpr
3275   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3276                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3277   InChain = FprIndex.getValue(1);
3278 
3279   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3280                                        DAG.getConstant(8, dl, MVT::i32));
3281 
3282   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3283                                         DAG.getConstant(4, dl, MVT::i32));
3284 
3285   // areas
3286   SDValue OverflowArea =
3287       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3288   InChain = OverflowArea.getValue(1);
3289 
3290   SDValue RegSaveArea =
3291       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3292   InChain = RegSaveArea.getValue(1);
3293 
3294   // select overflow_area if index > 8
3295   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3296                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3297 
3298   // adjustment constant gpr_index * 4/8
3299   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3300                                     VT.isInteger() ? GprIndex : FprIndex,
3301                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3302                                                     MVT::i32));
3303 
3304   // OurReg = RegSaveArea + RegConstant
3305   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3306                                RegConstant);
3307 
3308   // Floating types are 32 bytes into RegSaveArea
3309   if (VT.isFloatingPoint())
3310     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3311                          DAG.getConstant(32, dl, MVT::i32));
3312 
3313   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3314   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3315                                    VT.isInteger() ? GprIndex : FprIndex,
3316                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3317                                                    MVT::i32));
3318 
3319   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3320                               VT.isInteger() ? VAListPtr : FprPtr,
3321                               MachinePointerInfo(SV), MVT::i8);
3322 
3323   // determine if we should load from reg_save_area or overflow_area
3324   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3325 
3326   // increase overflow_area by 4/8 if gpr/fpr > 8
3327   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3328                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3329                                           dl, MVT::i32));
3330 
3331   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3332                              OverflowAreaPlusN);
3333 
3334   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3335                               MachinePointerInfo(), MVT::i32);
3336 
3337   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3338 }
3339 
3340 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3341   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3342 
3343   // We have to copy the entire va_list struct:
3344   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3345   return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3346                        DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3347                        false, true, false, MachinePointerInfo(),
3348                        MachinePointerInfo());
3349 }
3350 
3351 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3352                                                   SelectionDAG &DAG) const {
3353   if (Subtarget.isAIXABI())
3354     report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3355 
3356   return Op.getOperand(0);
3357 }
3358 
3359 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3360                                                 SelectionDAG &DAG) const {
3361   if (Subtarget.isAIXABI())
3362     report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3363 
3364   SDValue Chain = Op.getOperand(0);
3365   SDValue Trmp = Op.getOperand(1); // trampoline
3366   SDValue FPtr = Op.getOperand(2); // nested function
3367   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3368   SDLoc dl(Op);
3369 
3370   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3371   bool isPPC64 = (PtrVT == MVT::i64);
3372   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3373 
3374   TargetLowering::ArgListTy Args;
3375   TargetLowering::ArgListEntry Entry;
3376 
3377   Entry.Ty = IntPtrTy;
3378   Entry.Node = Trmp; Args.push_back(Entry);
3379 
3380   // TrampSize == (isPPC64 ? 48 : 40);
3381   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3382                                isPPC64 ? MVT::i64 : MVT::i32);
3383   Args.push_back(Entry);
3384 
3385   Entry.Node = FPtr; Args.push_back(Entry);
3386   Entry.Node = Nest; Args.push_back(Entry);
3387 
3388   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3389   TargetLowering::CallLoweringInfo CLI(DAG);
3390   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3391       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3392       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3393 
3394   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3395   return CallResult.second;
3396 }
3397 
3398 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3399   MachineFunction &MF = DAG.getMachineFunction();
3400   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3401   EVT PtrVT = getPointerTy(MF.getDataLayout());
3402 
3403   SDLoc dl(Op);
3404 
3405   if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3406     // vastart just stores the address of the VarArgsFrameIndex slot into the
3407     // memory location argument.
3408     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3409     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3410     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3411                         MachinePointerInfo(SV));
3412   }
3413 
3414   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3415   // We suppose the given va_list is already allocated.
3416   //
3417   // typedef struct {
3418   //  char gpr;     /* index into the array of 8 GPRs
3419   //                 * stored in the register save area
3420   //                 * gpr=0 corresponds to r3,
3421   //                 * gpr=1 to r4, etc.
3422   //                 */
3423   //  char fpr;     /* index into the array of 8 FPRs
3424   //                 * stored in the register save area
3425   //                 * fpr=0 corresponds to f1,
3426   //                 * fpr=1 to f2, etc.
3427   //                 */
3428   //  char *overflow_arg_area;
3429   //                /* location on stack that holds
3430   //                 * the next overflow argument
3431   //                 */
3432   //  char *reg_save_area;
3433   //               /* where r3:r10 and f1:f8 (if saved)
3434   //                * are stored
3435   //                */
3436   // } va_list[1];
3437 
3438   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3439   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3440   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3441                                             PtrVT);
3442   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3443                                  PtrVT);
3444 
3445   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3446   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3447 
3448   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3449   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3450 
3451   uint64_t FPROffset = 1;
3452   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3453 
3454   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3455 
3456   // Store first byte : number of int regs
3457   SDValue firstStore =
3458       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3459                         MachinePointerInfo(SV), MVT::i8);
3460   uint64_t nextOffset = FPROffset;
3461   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3462                                   ConstFPROffset);
3463 
3464   // Store second byte : number of float regs
3465   SDValue secondStore =
3466       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3467                         MachinePointerInfo(SV, nextOffset), MVT::i8);
3468   nextOffset += StackOffset;
3469   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3470 
3471   // Store second word : arguments given on stack
3472   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3473                                     MachinePointerInfo(SV, nextOffset));
3474   nextOffset += FrameOffset;
3475   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3476 
3477   // Store third word : arguments given in registers
3478   return DAG.getStore(thirdStore, dl, FR, nextPtr,
3479                       MachinePointerInfo(SV, nextOffset));
3480 }
3481 
3482 /// FPR - The set of FP registers that should be allocated for arguments
3483 /// on Darwin and AIX.
3484 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3485                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3486                                 PPC::F11, PPC::F12, PPC::F13};
3487 
3488 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3489 /// the stack.
3490 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3491                                        unsigned PtrByteSize) {
3492   unsigned ArgSize = ArgVT.getStoreSize();
3493   if (Flags.isByVal())
3494     ArgSize = Flags.getByValSize();
3495 
3496   // Round up to multiples of the pointer size, except for array members,
3497   // which are always packed.
3498   if (!Flags.isInConsecutiveRegs())
3499     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3500 
3501   return ArgSize;
3502 }
3503 
3504 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3505 /// on the stack.
3506 static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3507                                          ISD::ArgFlagsTy Flags,
3508                                          unsigned PtrByteSize) {
3509   Align Alignment(PtrByteSize);
3510 
3511   // Altivec parameters are padded to a 16 byte boundary.
3512   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3513       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3514       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3515       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3516     Alignment = Align(16);
3517 
3518   // ByVal parameters are aligned as requested.
3519   if (Flags.isByVal()) {
3520     auto BVAlign = Flags.getNonZeroByValAlign();
3521     if (BVAlign > PtrByteSize) {
3522       if (BVAlign.value() % PtrByteSize != 0)
3523         llvm_unreachable(
3524             "ByVal alignment is not a multiple of the pointer size");
3525 
3526       Alignment = BVAlign;
3527     }
3528   }
3529 
3530   // Array members are always packed to their original alignment.
3531   if (Flags.isInConsecutiveRegs()) {
3532     // If the array member was split into multiple registers, the first
3533     // needs to be aligned to the size of the full type.  (Except for
3534     // ppcf128, which is only aligned as its f64 components.)
3535     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3536       Alignment = Align(OrigVT.getStoreSize());
3537     else
3538       Alignment = Align(ArgVT.getStoreSize());
3539   }
3540 
3541   return Alignment;
3542 }
3543 
3544 /// CalculateStackSlotUsed - Return whether this argument will use its
3545 /// stack slot (instead of being passed in registers).  ArgOffset,
3546 /// AvailableFPRs, and AvailableVRs must hold the current argument
3547 /// position, and will be updated to account for this argument.
3548 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3549                                    unsigned PtrByteSize, unsigned LinkageSize,
3550                                    unsigned ParamAreaSize, unsigned &ArgOffset,
3551                                    unsigned &AvailableFPRs,
3552                                    unsigned &AvailableVRs) {
3553   bool UseMemory = false;
3554 
3555   // Respect alignment of argument on the stack.
3556   Align Alignment =
3557       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3558   ArgOffset = alignTo(ArgOffset, Alignment);
3559   // If there's no space left in the argument save area, we must
3560   // use memory (this check also catches zero-sized arguments).
3561   if (ArgOffset >= LinkageSize + ParamAreaSize)
3562     UseMemory = true;
3563 
3564   // Allocate argument on the stack.
3565   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3566   if (Flags.isInConsecutiveRegsLast())
3567     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3568   // If we overran the argument save area, we must use memory
3569   // (this check catches arguments passed partially in memory)
3570   if (ArgOffset > LinkageSize + ParamAreaSize)
3571     UseMemory = true;
3572 
3573   // However, if the argument is actually passed in an FPR or a VR,
3574   // we don't use memory after all.
3575   if (!Flags.isByVal()) {
3576     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3577       if (AvailableFPRs > 0) {
3578         --AvailableFPRs;
3579         return false;
3580       }
3581     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3582         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3583         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3584         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3585       if (AvailableVRs > 0) {
3586         --AvailableVRs;
3587         return false;
3588       }
3589   }
3590 
3591   return UseMemory;
3592 }
3593 
3594 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3595 /// ensure minimum alignment required for target.
3596 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3597                                      unsigned NumBytes) {
3598   return alignTo(NumBytes, Lowering->getStackAlign());
3599 }
3600 
3601 SDValue PPCTargetLowering::LowerFormalArguments(
3602     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3603     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3604     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3605   if (Subtarget.isAIXABI())
3606     return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3607                                     InVals);
3608   if (Subtarget.is64BitELFABI())
3609     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3610                                        InVals);
3611   if (Subtarget.is32BitELFABI())
3612     return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3613                                        InVals);
3614 
3615   return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3616                                      InVals);
3617 }
3618 
3619 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3620     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3621     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3622     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3623 
3624   // 32-bit SVR4 ABI Stack Frame Layout:
3625   //              +-----------------------------------+
3626   //        +-->  |            Back chain             |
3627   //        |     +-----------------------------------+
3628   //        |     | Floating-point register save area |
3629   //        |     +-----------------------------------+
3630   //        |     |    General register save area     |
3631   //        |     +-----------------------------------+
3632   //        |     |          CR save word             |
3633   //        |     +-----------------------------------+
3634   //        |     |         VRSAVE save word          |
3635   //        |     +-----------------------------------+
3636   //        |     |         Alignment padding         |
3637   //        |     +-----------------------------------+
3638   //        |     |     Vector register save area     |
3639   //        |     +-----------------------------------+
3640   //        |     |       Local variable space        |
3641   //        |     +-----------------------------------+
3642   //        |     |        Parameter list area        |
3643   //        |     +-----------------------------------+
3644   //        |     |           LR save word            |
3645   //        |     +-----------------------------------+
3646   // SP-->  +---  |            Back chain             |
3647   //              +-----------------------------------+
3648   //
3649   // Specifications:
3650   //   System V Application Binary Interface PowerPC Processor Supplement
3651   //   AltiVec Technology Programming Interface Manual
3652 
3653   MachineFunction &MF = DAG.getMachineFunction();
3654   MachineFrameInfo &MFI = MF.getFrameInfo();
3655   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3656 
3657   EVT PtrVT = getPointerTy(MF.getDataLayout());
3658   // Potential tail calls could cause overwriting of argument stack slots.
3659   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3660                        (CallConv == CallingConv::Fast));
3661   const Align PtrAlign(4);
3662 
3663   // Assign locations to all of the incoming arguments.
3664   SmallVector<CCValAssign, 16> ArgLocs;
3665   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3666                  *DAG.getContext());
3667 
3668   // Reserve space for the linkage area on the stack.
3669   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3670   CCInfo.AllocateStack(LinkageSize, PtrAlign);
3671   if (useSoftFloat())
3672     CCInfo.PreAnalyzeFormalArguments(Ins);
3673 
3674   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3675   CCInfo.clearWasPPCF128();
3676 
3677   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3678     CCValAssign &VA = ArgLocs[i];
3679 
3680     // Arguments stored in registers.
3681     if (VA.isRegLoc()) {
3682       const TargetRegisterClass *RC;
3683       EVT ValVT = VA.getValVT();
3684 
3685       switch (ValVT.getSimpleVT().SimpleTy) {
3686         default:
3687           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3688         case MVT::i1:
3689         case MVT::i32:
3690           RC = &PPC::GPRCRegClass;
3691           break;
3692         case MVT::f32:
3693           if (Subtarget.hasP8Vector())
3694             RC = &PPC::VSSRCRegClass;
3695           else if (Subtarget.hasSPE())
3696             RC = &PPC::GPRCRegClass;
3697           else
3698             RC = &PPC::F4RCRegClass;
3699           break;
3700         case MVT::f64:
3701           if (Subtarget.hasVSX())
3702             RC = &PPC::VSFRCRegClass;
3703           else if (Subtarget.hasSPE())
3704             // SPE passes doubles in GPR pairs.
3705             RC = &PPC::GPRCRegClass;
3706           else
3707             RC = &PPC::F8RCRegClass;
3708           break;
3709         case MVT::v16i8:
3710         case MVT::v8i16:
3711         case MVT::v4i32:
3712           RC = &PPC::VRRCRegClass;
3713           break;
3714         case MVT::v4f32:
3715           RC = &PPC::VRRCRegClass;
3716           break;
3717         case MVT::v2f64:
3718         case MVT::v2i64:
3719           RC = &PPC::VRRCRegClass;
3720           break;
3721       }
3722 
3723       SDValue ArgValue;
3724       // Transform the arguments stored in physical registers into
3725       // virtual ones.
3726       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3727         assert(i + 1 < e && "No second half of double precision argument");
3728         unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3729         unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3730         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3731         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3732         if (!Subtarget.isLittleEndian())
3733           std::swap (ArgValueLo, ArgValueHi);
3734         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3735                                ArgValueHi);
3736       } else {
3737         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3738         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3739                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
3740         if (ValVT == MVT::i1)
3741           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3742       }
3743 
3744       InVals.push_back(ArgValue);
3745     } else {
3746       // Argument stored in memory.
3747       assert(VA.isMemLoc());
3748 
3749       // Get the extended size of the argument type in stack
3750       unsigned ArgSize = VA.getLocVT().getStoreSize();
3751       // Get the actual size of the argument type
3752       unsigned ObjSize = VA.getValVT().getStoreSize();
3753       unsigned ArgOffset = VA.getLocMemOffset();
3754       // Stack objects in PPC32 are right justified.
3755       ArgOffset += ArgSize - ObjSize;
3756       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3757 
3758       // Create load nodes to retrieve arguments from the stack.
3759       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3760       InVals.push_back(
3761           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3762     }
3763   }
3764 
3765   // Assign locations to all of the incoming aggregate by value arguments.
3766   // Aggregates passed by value are stored in the local variable space of the
3767   // caller's stack frame, right above the parameter list area.
3768   SmallVector<CCValAssign, 16> ByValArgLocs;
3769   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3770                       ByValArgLocs, *DAG.getContext());
3771 
3772   // Reserve stack space for the allocations in CCInfo.
3773   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3774 
3775   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3776 
3777   // Area that is at least reserved in the caller of this function.
3778   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3779   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3780 
3781   // Set the size that is at least reserved in caller of this function.  Tail
3782   // call optimized function's reserved stack space needs to be aligned so that
3783   // taking the difference between two stack areas will result in an aligned
3784   // stack.
3785   MinReservedArea =
3786       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3787   FuncInfo->setMinReservedArea(MinReservedArea);
3788 
3789   SmallVector<SDValue, 8> MemOps;
3790 
3791   // If the function takes variable number of arguments, make a frame index for
3792   // the start of the first vararg value... for expansion of llvm.va_start.
3793   if (isVarArg) {
3794     static const MCPhysReg GPArgRegs[] = {
3795       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3796       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3797     };
3798     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3799 
3800     static const MCPhysReg FPArgRegs[] = {
3801       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3802       PPC::F8
3803     };
3804     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3805 
3806     if (useSoftFloat() || hasSPE())
3807        NumFPArgRegs = 0;
3808 
3809     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3810     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3811 
3812     // Make room for NumGPArgRegs and NumFPArgRegs.
3813     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3814                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3815 
3816     FuncInfo->setVarArgsStackOffset(
3817       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3818                             CCInfo.getNextStackOffset(), true));
3819 
3820     FuncInfo->setVarArgsFrameIndex(
3821         MFI.CreateStackObject(Depth, Align(8), false));
3822     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3823 
3824     // The fixed integer arguments of a variadic function are stored to the
3825     // VarArgsFrameIndex on the stack so that they may be loaded by
3826     // dereferencing the result of va_next.
3827     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3828       // Get an existing live-in vreg, or add a new one.
3829       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3830       if (!VReg)
3831         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3832 
3833       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3834       SDValue Store =
3835           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3836       MemOps.push_back(Store);
3837       // Increment the address by four for the next argument to store
3838       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3839       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3840     }
3841 
3842     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3843     // is set.
3844     // The double arguments are stored to the VarArgsFrameIndex
3845     // on the stack.
3846     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3847       // Get an existing live-in vreg, or add a new one.
3848       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3849       if (!VReg)
3850         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3851 
3852       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3853       SDValue Store =
3854           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3855       MemOps.push_back(Store);
3856       // Increment the address by eight for the next argument to store
3857       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3858                                          PtrVT);
3859       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3860     }
3861   }
3862 
3863   if (!MemOps.empty())
3864     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3865 
3866   return Chain;
3867 }
3868 
3869 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3870 // value to MVT::i64 and then truncate to the correct register size.
3871 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3872                                              EVT ObjectVT, SelectionDAG &DAG,
3873                                              SDValue ArgVal,
3874                                              const SDLoc &dl) const {
3875   if (Flags.isSExt())
3876     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3877                          DAG.getValueType(ObjectVT));
3878   else if (Flags.isZExt())
3879     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3880                          DAG.getValueType(ObjectVT));
3881 
3882   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3883 }
3884 
3885 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3886     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3887     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3888     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3889   // TODO: add description of PPC stack frame format, or at least some docs.
3890   //
3891   bool isELFv2ABI = Subtarget.isELFv2ABI();
3892   bool isLittleEndian = Subtarget.isLittleEndian();
3893   MachineFunction &MF = DAG.getMachineFunction();
3894   MachineFrameInfo &MFI = MF.getFrameInfo();
3895   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3896 
3897   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3898          "fastcc not supported on varargs functions");
3899 
3900   EVT PtrVT = getPointerTy(MF.getDataLayout());
3901   // Potential tail calls could cause overwriting of argument stack slots.
3902   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3903                        (CallConv == CallingConv::Fast));
3904   unsigned PtrByteSize = 8;
3905   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3906 
3907   static const MCPhysReg GPR[] = {
3908     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3909     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3910   };
3911   static const MCPhysReg VR[] = {
3912     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3913     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3914   };
3915 
3916   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3917   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
3918   const unsigned Num_VR_Regs  = array_lengthof(VR);
3919 
3920   // Do a first pass over the arguments to determine whether the ABI
3921   // guarantees that our caller has allocated the parameter save area
3922   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3923   // in the ELFv2 ABI, it is true if this is a vararg function or if
3924   // any parameter is located in a stack slot.
3925 
3926   bool HasParameterArea = !isELFv2ABI || isVarArg;
3927   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3928   unsigned NumBytes = LinkageSize;
3929   unsigned AvailableFPRs = Num_FPR_Regs;
3930   unsigned AvailableVRs = Num_VR_Regs;
3931   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3932     if (Ins[i].Flags.isNest())
3933       continue;
3934 
3935     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3936                                PtrByteSize, LinkageSize, ParamAreaSize,
3937                                NumBytes, AvailableFPRs, AvailableVRs))
3938       HasParameterArea = true;
3939   }
3940 
3941   // Add DAG nodes to load the arguments or copy them out of registers.  On
3942   // entry to a function on PPC, the arguments start after the linkage area,
3943   // although the first ones are often in registers.
3944 
3945   unsigned ArgOffset = LinkageSize;
3946   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3947   SmallVector<SDValue, 8> MemOps;
3948   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
3949   unsigned CurArgIdx = 0;
3950   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3951     SDValue ArgVal;
3952     bool needsLoad = false;
3953     EVT ObjectVT = Ins[ArgNo].VT;
3954     EVT OrigVT = Ins[ArgNo].ArgVT;
3955     unsigned ObjSize = ObjectVT.getStoreSize();
3956     unsigned ArgSize = ObjSize;
3957     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3958     if (Ins[ArgNo].isOrigArg()) {
3959       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3960       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3961     }
3962     // We re-align the argument offset for each argument, except when using the
3963     // fast calling convention, when we need to make sure we do that only when
3964     // we'll actually use a stack slot.
3965     unsigned CurArgOffset;
3966     Align Alignment;
3967     auto ComputeArgOffset = [&]() {
3968       /* Respect alignment of argument on the stack.  */
3969       Alignment =
3970           CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3971       ArgOffset = alignTo(ArgOffset, Alignment);
3972       CurArgOffset = ArgOffset;
3973     };
3974 
3975     if (CallConv != CallingConv::Fast) {
3976       ComputeArgOffset();
3977 
3978       /* Compute GPR index associated with argument offset.  */
3979       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3980       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3981     }
3982 
3983     // FIXME the codegen can be much improved in some cases.
3984     // We do not have to keep everything in memory.
3985     if (Flags.isByVal()) {
3986       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3987 
3988       if (CallConv == CallingConv::Fast)
3989         ComputeArgOffset();
3990 
3991       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3992       ObjSize = Flags.getByValSize();
3993       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3994       // Empty aggregate parameters do not take up registers.  Examples:
3995       //   struct { } a;
3996       //   union  { } b;
3997       //   int c[0];
3998       // etc.  However, we have to provide a place-holder in InVals, so
3999       // pretend we have an 8-byte item at the current address for that
4000       // purpose.
4001       if (!ObjSize) {
4002         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4003         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4004         InVals.push_back(FIN);
4005         continue;
4006       }
4007 
4008       // Create a stack object covering all stack doublewords occupied
4009       // by the argument.  If the argument is (fully or partially) on
4010       // the stack, or if the argument is fully in registers but the
4011       // caller has allocated the parameter save anyway, we can refer
4012       // directly to the caller's stack frame.  Otherwise, create a
4013       // local copy in our own frame.
4014       int FI;
4015       if (HasParameterArea ||
4016           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4017         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4018       else
4019         FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4020       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4021 
4022       // Handle aggregates smaller than 8 bytes.
4023       if (ObjSize < PtrByteSize) {
4024         // The value of the object is its address, which differs from the
4025         // address of the enclosing doubleword on big-endian systems.
4026         SDValue Arg = FIN;
4027         if (!isLittleEndian) {
4028           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4029           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4030         }
4031         InVals.push_back(Arg);
4032 
4033         if (GPR_idx != Num_GPR_Regs) {
4034           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4035           FuncInfo->addLiveInAttr(VReg, Flags);
4036           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4037           SDValue Store;
4038 
4039           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4040             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4041                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
4042             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4043                                       MachinePointerInfo(&*FuncArg), ObjType);
4044           } else {
4045             // For sizes that don't fit a truncating store (3, 5, 6, 7),
4046             // store the whole register as-is to the parameter save area
4047             // slot.
4048             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4049                                  MachinePointerInfo(&*FuncArg));
4050           }
4051 
4052           MemOps.push_back(Store);
4053         }
4054         // Whether we copied from a register or not, advance the offset
4055         // into the parameter save area by a full doubleword.
4056         ArgOffset += PtrByteSize;
4057         continue;
4058       }
4059 
4060       // The value of the object is its address, which is the address of
4061       // its first stack doubleword.
4062       InVals.push_back(FIN);
4063 
4064       // Store whatever pieces of the object are in registers to memory.
4065       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4066         if (GPR_idx == Num_GPR_Regs)
4067           break;
4068 
4069         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4070         FuncInfo->addLiveInAttr(VReg, Flags);
4071         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4072         SDValue Addr = FIN;
4073         if (j) {
4074           SDValue Off = DAG.getConstant(j, dl, PtrVT);
4075           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4076         }
4077         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4078                                      MachinePointerInfo(&*FuncArg, j));
4079         MemOps.push_back(Store);
4080         ++GPR_idx;
4081       }
4082       ArgOffset += ArgSize;
4083       continue;
4084     }
4085 
4086     switch (ObjectVT.getSimpleVT().SimpleTy) {
4087     default: llvm_unreachable("Unhandled argument type!");
4088     case MVT::i1:
4089     case MVT::i32:
4090     case MVT::i64:
4091       if (Flags.isNest()) {
4092         // The 'nest' parameter, if any, is passed in R11.
4093         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4094         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4095 
4096         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4097           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4098 
4099         break;
4100       }
4101 
4102       // These can be scalar arguments or elements of an integer array type
4103       // passed directly.  Clang may use those instead of "byval" aggregate
4104       // types to avoid forcing arguments to memory unnecessarily.
4105       if (GPR_idx != Num_GPR_Regs) {
4106         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4107         FuncInfo->addLiveInAttr(VReg, Flags);
4108         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4109 
4110         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4111           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4112           // value to MVT::i64 and then truncate to the correct register size.
4113           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4114       } else {
4115         if (CallConv == CallingConv::Fast)
4116           ComputeArgOffset();
4117 
4118         needsLoad = true;
4119         ArgSize = PtrByteSize;
4120       }
4121       if (CallConv != CallingConv::Fast || needsLoad)
4122         ArgOffset += 8;
4123       break;
4124 
4125     case MVT::f32:
4126     case MVT::f64:
4127       // These can be scalar arguments or elements of a float array type
4128       // passed directly.  The latter are used to implement ELFv2 homogenous
4129       // float aggregates.
4130       if (FPR_idx != Num_FPR_Regs) {
4131         unsigned VReg;
4132 
4133         if (ObjectVT == MVT::f32)
4134           VReg = MF.addLiveIn(FPR[FPR_idx],
4135                               Subtarget.hasP8Vector()
4136                                   ? &PPC::VSSRCRegClass
4137                                   : &PPC::F4RCRegClass);
4138         else
4139           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4140                                                 ? &PPC::VSFRCRegClass
4141                                                 : &PPC::F8RCRegClass);
4142 
4143         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4144         ++FPR_idx;
4145       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4146         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4147         // once we support fp <-> gpr moves.
4148 
4149         // This can only ever happen in the presence of f32 array types,
4150         // since otherwise we never run out of FPRs before running out
4151         // of GPRs.
4152         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4153         FuncInfo->addLiveInAttr(VReg, Flags);
4154         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4155 
4156         if (ObjectVT == MVT::f32) {
4157           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4158             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4159                                  DAG.getConstant(32, dl, MVT::i32));
4160           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4161         }
4162 
4163         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4164       } else {
4165         if (CallConv == CallingConv::Fast)
4166           ComputeArgOffset();
4167 
4168         needsLoad = true;
4169       }
4170 
4171       // When passing an array of floats, the array occupies consecutive
4172       // space in the argument area; only round up to the next doubleword
4173       // at the end of the array.  Otherwise, each float takes 8 bytes.
4174       if (CallConv != CallingConv::Fast || needsLoad) {
4175         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4176         ArgOffset += ArgSize;
4177         if (Flags.isInConsecutiveRegsLast())
4178           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4179       }
4180       break;
4181     case MVT::v4f32:
4182     case MVT::v4i32:
4183     case MVT::v8i16:
4184     case MVT::v16i8:
4185     case MVT::v2f64:
4186     case MVT::v2i64:
4187     case MVT::v1i128:
4188     case MVT::f128:
4189       // These can be scalar arguments or elements of a vector array type
4190       // passed directly.  The latter are used to implement ELFv2 homogenous
4191       // vector aggregates.
4192       if (VR_idx != Num_VR_Regs) {
4193         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4194         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4195         ++VR_idx;
4196       } else {
4197         if (CallConv == CallingConv::Fast)
4198           ComputeArgOffset();
4199         needsLoad = true;
4200       }
4201       if (CallConv != CallingConv::Fast || needsLoad)
4202         ArgOffset += 16;
4203       break;
4204     }
4205 
4206     // We need to load the argument to a virtual register if we determined
4207     // above that we ran out of physical registers of the appropriate type.
4208     if (needsLoad) {
4209       if (ObjSize < ArgSize && !isLittleEndian)
4210         CurArgOffset += ArgSize - ObjSize;
4211       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4212       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4213       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4214     }
4215 
4216     InVals.push_back(ArgVal);
4217   }
4218 
4219   // Area that is at least reserved in the caller of this function.
4220   unsigned MinReservedArea;
4221   if (HasParameterArea)
4222     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4223   else
4224     MinReservedArea = LinkageSize;
4225 
4226   // Set the size that is at least reserved in caller of this function.  Tail
4227   // call optimized functions' reserved stack space needs to be aligned so that
4228   // taking the difference between two stack areas will result in an aligned
4229   // stack.
4230   MinReservedArea =
4231       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4232   FuncInfo->setMinReservedArea(MinReservedArea);
4233 
4234   // If the function takes variable number of arguments, make a frame index for
4235   // the start of the first vararg value... for expansion of llvm.va_start.
4236   // On ELFv2ABI spec, it writes:
4237   // C programs that are intended to be *portable* across different compilers
4238   // and architectures must use the header file <stdarg.h> to deal with variable
4239   // argument lists.
4240   if (isVarArg && MFI.hasVAStart()) {
4241     int Depth = ArgOffset;
4242 
4243     FuncInfo->setVarArgsFrameIndex(
4244       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4245     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4246 
4247     // If this function is vararg, store any remaining integer argument regs
4248     // to their spots on the stack so that they may be loaded by dereferencing
4249     // the result of va_next.
4250     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4251          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4252       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4253       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4254       SDValue Store =
4255           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4256       MemOps.push_back(Store);
4257       // Increment the address by four for the next argument to store
4258       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4259       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4260     }
4261   }
4262 
4263   if (!MemOps.empty())
4264     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4265 
4266   return Chain;
4267 }
4268 
4269 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4270     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4271     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4272     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4273   // TODO: add description of PPC stack frame format, or at least some docs.
4274   //
4275   MachineFunction &MF = DAG.getMachineFunction();
4276   MachineFrameInfo &MFI = MF.getFrameInfo();
4277   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4278 
4279   EVT PtrVT = getPointerTy(MF.getDataLayout());
4280   bool isPPC64 = PtrVT == MVT::i64;
4281   // Potential tail calls could cause overwriting of argument stack slots.
4282   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4283                        (CallConv == CallingConv::Fast));
4284   unsigned PtrByteSize = isPPC64 ? 8 : 4;
4285   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4286   unsigned ArgOffset = LinkageSize;
4287   // Area that is at least reserved in caller of this function.
4288   unsigned MinReservedArea = ArgOffset;
4289 
4290   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4291     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4292     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4293   };
4294   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4295     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4296     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4297   };
4298   static const MCPhysReg VR[] = {
4299     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4300     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4301   };
4302 
4303   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4304   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4305   const unsigned Num_VR_Regs  = array_lengthof( VR);
4306 
4307   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4308 
4309   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4310 
4311   // In 32-bit non-varargs functions, the stack space for vectors is after the
4312   // stack space for non-vectors.  We do not use this space unless we have
4313   // too many vectors to fit in registers, something that only occurs in
4314   // constructed examples:), but we have to walk the arglist to figure
4315   // that out...for the pathological case, compute VecArgOffset as the
4316   // start of the vector parameter area.  Computing VecArgOffset is the
4317   // entire point of the following loop.
4318   unsigned VecArgOffset = ArgOffset;
4319   if (!isVarArg && !isPPC64) {
4320     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4321          ++ArgNo) {
4322       EVT ObjectVT = Ins[ArgNo].VT;
4323       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4324 
4325       if (Flags.isByVal()) {
4326         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4327         unsigned ObjSize = Flags.getByValSize();
4328         unsigned ArgSize =
4329                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4330         VecArgOffset += ArgSize;
4331         continue;
4332       }
4333 
4334       switch(ObjectVT.getSimpleVT().SimpleTy) {
4335       default: llvm_unreachable("Unhandled argument type!");
4336       case MVT::i1:
4337       case MVT::i32:
4338       case MVT::f32:
4339         VecArgOffset += 4;
4340         break;
4341       case MVT::i64:  // PPC64
4342       case MVT::f64:
4343         // FIXME: We are guaranteed to be !isPPC64 at this point.
4344         // Does MVT::i64 apply?
4345         VecArgOffset += 8;
4346         break;
4347       case MVT::v4f32:
4348       case MVT::v4i32:
4349       case MVT::v8i16:
4350       case MVT::v16i8:
4351         // Nothing to do, we're only looking at Nonvector args here.
4352         break;
4353       }
4354     }
4355   }
4356   // We've found where the vector parameter area in memory is.  Skip the
4357   // first 12 parameters; these don't use that memory.
4358   VecArgOffset = ((VecArgOffset+15)/16)*16;
4359   VecArgOffset += 12*16;
4360 
4361   // Add DAG nodes to load the arguments or copy them out of registers.  On
4362   // entry to a function on PPC, the arguments start after the linkage area,
4363   // although the first ones are often in registers.
4364 
4365   SmallVector<SDValue, 8> MemOps;
4366   unsigned nAltivecParamsAtEnd = 0;
4367   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4368   unsigned CurArgIdx = 0;
4369   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4370     SDValue ArgVal;
4371     bool needsLoad = false;
4372     EVT ObjectVT = Ins[ArgNo].VT;
4373     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4374     unsigned ArgSize = ObjSize;
4375     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4376     if (Ins[ArgNo].isOrigArg()) {
4377       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4378       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4379     }
4380     unsigned CurArgOffset = ArgOffset;
4381 
4382     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4383     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4384         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4385       if (isVarArg || isPPC64) {
4386         MinReservedArea = ((MinReservedArea+15)/16)*16;
4387         MinReservedArea += CalculateStackSlotSize(ObjectVT,
4388                                                   Flags,
4389                                                   PtrByteSize);
4390       } else  nAltivecParamsAtEnd++;
4391     } else
4392       // Calculate min reserved area.
4393       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4394                                                 Flags,
4395                                                 PtrByteSize);
4396 
4397     // FIXME the codegen can be much improved in some cases.
4398     // We do not have to keep everything in memory.
4399     if (Flags.isByVal()) {
4400       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4401 
4402       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4403       ObjSize = Flags.getByValSize();
4404       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4405       // Objects of size 1 and 2 are right justified, everything else is
4406       // left justified.  This means the memory address is adjusted forwards.
4407       if (ObjSize==1 || ObjSize==2) {
4408         CurArgOffset = CurArgOffset + (4 - ObjSize);
4409       }
4410       // The value of the object is its address.
4411       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4412       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4413       InVals.push_back(FIN);
4414       if (ObjSize==1 || ObjSize==2) {
4415         if (GPR_idx != Num_GPR_Regs) {
4416           unsigned VReg;
4417           if (isPPC64)
4418             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4419           else
4420             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4421           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4422           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4423           SDValue Store =
4424               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4425                                 MachinePointerInfo(&*FuncArg), ObjType);
4426           MemOps.push_back(Store);
4427           ++GPR_idx;
4428         }
4429 
4430         ArgOffset += PtrByteSize;
4431 
4432         continue;
4433       }
4434       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4435         // Store whatever pieces of the object are in registers
4436         // to memory.  ArgOffset will be the address of the beginning
4437         // of the object.
4438         if (GPR_idx != Num_GPR_Regs) {
4439           unsigned VReg;
4440           if (isPPC64)
4441             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4442           else
4443             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4444           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4445           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4446           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4447           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4448                                        MachinePointerInfo(&*FuncArg, j));
4449           MemOps.push_back(Store);
4450           ++GPR_idx;
4451           ArgOffset += PtrByteSize;
4452         } else {
4453           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4454           break;
4455         }
4456       }
4457       continue;
4458     }
4459 
4460     switch (ObjectVT.getSimpleVT().SimpleTy) {
4461     default: llvm_unreachable("Unhandled argument type!");
4462     case MVT::i1:
4463     case MVT::i32:
4464       if (!isPPC64) {
4465         if (GPR_idx != Num_GPR_Regs) {
4466           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4467           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4468 
4469           if (ObjectVT == MVT::i1)
4470             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4471 
4472           ++GPR_idx;
4473         } else {
4474           needsLoad = true;
4475           ArgSize = PtrByteSize;
4476         }
4477         // All int arguments reserve stack space in the Darwin ABI.
4478         ArgOffset += PtrByteSize;
4479         break;
4480       }
4481       LLVM_FALLTHROUGH;
4482     case MVT::i64:  // PPC64
4483       if (GPR_idx != Num_GPR_Regs) {
4484         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4485         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4486 
4487         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4488           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4489           // value to MVT::i64 and then truncate to the correct register size.
4490           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4491 
4492         ++GPR_idx;
4493       } else {
4494         needsLoad = true;
4495         ArgSize = PtrByteSize;
4496       }
4497       // All int arguments reserve stack space in the Darwin ABI.
4498       ArgOffset += 8;
4499       break;
4500 
4501     case MVT::f32:
4502     case MVT::f64:
4503       // Every 4 bytes of argument space consumes one of the GPRs available for
4504       // argument passing.
4505       if (GPR_idx != Num_GPR_Regs) {
4506         ++GPR_idx;
4507         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4508           ++GPR_idx;
4509       }
4510       if (FPR_idx != Num_FPR_Regs) {
4511         unsigned VReg;
4512 
4513         if (ObjectVT == MVT::f32)
4514           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4515         else
4516           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4517 
4518         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4519         ++FPR_idx;
4520       } else {
4521         needsLoad = true;
4522       }
4523 
4524       // All FP arguments reserve stack space in the Darwin ABI.
4525       ArgOffset += isPPC64 ? 8 : ObjSize;
4526       break;
4527     case MVT::v4f32:
4528     case MVT::v4i32:
4529     case MVT::v8i16:
4530     case MVT::v16i8:
4531       // Note that vector arguments in registers don't reserve stack space,
4532       // except in varargs functions.
4533       if (VR_idx != Num_VR_Regs) {
4534         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4535         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4536         if (isVarArg) {
4537           while ((ArgOffset % 16) != 0) {
4538             ArgOffset += PtrByteSize;
4539             if (GPR_idx != Num_GPR_Regs)
4540               GPR_idx++;
4541           }
4542           ArgOffset += 16;
4543           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4544         }
4545         ++VR_idx;
4546       } else {
4547         if (!isVarArg && !isPPC64) {
4548           // Vectors go after all the nonvectors.
4549           CurArgOffset = VecArgOffset;
4550           VecArgOffset += 16;
4551         } else {
4552           // Vectors are aligned.
4553           ArgOffset = ((ArgOffset+15)/16)*16;
4554           CurArgOffset = ArgOffset;
4555           ArgOffset += 16;
4556         }
4557         needsLoad = true;
4558       }
4559       break;
4560     }
4561 
4562     // We need to load the argument to a virtual register if we determined above
4563     // that we ran out of physical registers of the appropriate type.
4564     if (needsLoad) {
4565       int FI = MFI.CreateFixedObject(ObjSize,
4566                                      CurArgOffset + (ArgSize - ObjSize),
4567                                      isImmutable);
4568       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4569       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4570     }
4571 
4572     InVals.push_back(ArgVal);
4573   }
4574 
4575   // Allow for Altivec parameters at the end, if needed.
4576   if (nAltivecParamsAtEnd) {
4577     MinReservedArea = ((MinReservedArea+15)/16)*16;
4578     MinReservedArea += 16*nAltivecParamsAtEnd;
4579   }
4580 
4581   // Area that is at least reserved in the caller of this function.
4582   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4583 
4584   // Set the size that is at least reserved in caller of this function.  Tail
4585   // call optimized functions' reserved stack space needs to be aligned so that
4586   // taking the difference between two stack areas will result in an aligned
4587   // stack.
4588   MinReservedArea =
4589       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4590   FuncInfo->setMinReservedArea(MinReservedArea);
4591 
4592   // If the function takes variable number of arguments, make a frame index for
4593   // the start of the first vararg value... for expansion of llvm.va_start.
4594   if (isVarArg) {
4595     int Depth = ArgOffset;
4596 
4597     FuncInfo->setVarArgsFrameIndex(
4598       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4599                             Depth, true));
4600     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4601 
4602     // If this function is vararg, store any remaining integer argument regs
4603     // to their spots on the stack so that they may be loaded by dereferencing
4604     // the result of va_next.
4605     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4606       unsigned VReg;
4607 
4608       if (isPPC64)
4609         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4610       else
4611         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4612 
4613       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4614       SDValue Store =
4615           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4616       MemOps.push_back(Store);
4617       // Increment the address by four for the next argument to store
4618       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4619       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4620     }
4621   }
4622 
4623   if (!MemOps.empty())
4624     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4625 
4626   return Chain;
4627 }
4628 
4629 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4630 /// adjusted to accommodate the arguments for the tailcall.
4631 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4632                                    unsigned ParamSize) {
4633 
4634   if (!isTailCall) return 0;
4635 
4636   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4637   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4638   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4639   // Remember only if the new adjustment is bigger.
4640   if (SPDiff < FI->getTailCallSPDelta())
4641     FI->setTailCallSPDelta(SPDiff);
4642 
4643   return SPDiff;
4644 }
4645 
4646 static bool isFunctionGlobalAddress(SDValue Callee);
4647 
4648 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4649                               const TargetMachine &TM) {
4650   // It does not make sense to call callsShareTOCBase() with a caller that
4651   // is PC Relative since PC Relative callers do not have a TOC.
4652 #ifndef NDEBUG
4653   const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4654   assert(!STICaller->isUsingPCRelativeCalls() &&
4655          "PC Relative callers do not have a TOC and cannot share a TOC Base");
4656 #endif
4657 
4658   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4659   // don't have enough information to determine if the caller and callee share
4660   // the same  TOC base, so we have to pessimistically assume they don't for
4661   // correctness.
4662   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4663   if (!G)
4664     return false;
4665 
4666   const GlobalValue *GV = G->getGlobal();
4667 
4668   // If the callee is preemptable, then the static linker will use a plt-stub
4669   // which saves the toc to the stack, and needs a nop after the call
4670   // instruction to convert to a toc-restore.
4671   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4672     return false;
4673 
4674   // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4675   // We may need a TOC restore in the situation where the caller requires a
4676   // valid TOC but the callee is PC Relative and does not.
4677   const Function *F = dyn_cast<Function>(GV);
4678   const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4679 
4680   // If we have an Alias we can try to get the function from there.
4681   if (Alias) {
4682     const GlobalObject *GlobalObj = Alias->getBaseObject();
4683     F = dyn_cast<Function>(GlobalObj);
4684   }
4685 
4686   // If we still have no valid function pointer we do not have enough
4687   // information to determine if the callee uses PC Relative calls so we must
4688   // assume that it does.
4689   if (!F)
4690     return false;
4691 
4692   // If the callee uses PC Relative we cannot guarantee that the callee won't
4693   // clobber the TOC of the caller and so we must assume that the two
4694   // functions do not share a TOC base.
4695   const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4696   if (STICallee->isUsingPCRelativeCalls())
4697     return false;
4698 
4699   // The medium and large code models are expected to provide a sufficiently
4700   // large TOC to provide all data addressing needs of a module with a
4701   // single TOC.
4702   if (CodeModel::Medium == TM.getCodeModel() ||
4703       CodeModel::Large == TM.getCodeModel())
4704     return true;
4705 
4706   // Otherwise we need to ensure callee and caller are in the same section,
4707   // since the linker may allocate multiple TOCs, and we don't know which
4708   // sections will belong to the same TOC base.
4709   if (!GV->isStrongDefinitionForLinker())
4710     return false;
4711 
4712   // Any explicitly-specified sections and section prefixes must also match.
4713   // Also, if we're using -ffunction-sections, then each function is always in
4714   // a different section (the same is true for COMDAT functions).
4715   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4716       GV->getSection() != Caller->getSection())
4717     return false;
4718   if (const auto *F = dyn_cast<Function>(GV)) {
4719     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4720       return false;
4721   }
4722 
4723   return true;
4724 }
4725 
4726 static bool
4727 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4728                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4729   assert(Subtarget.is64BitELFABI());
4730 
4731   const unsigned PtrByteSize = 8;
4732   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4733 
4734   static const MCPhysReg GPR[] = {
4735     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4736     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4737   };
4738   static const MCPhysReg VR[] = {
4739     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4740     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4741   };
4742 
4743   const unsigned NumGPRs = array_lengthof(GPR);
4744   const unsigned NumFPRs = 13;
4745   const unsigned NumVRs = array_lengthof(VR);
4746   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4747 
4748   unsigned NumBytes = LinkageSize;
4749   unsigned AvailableFPRs = NumFPRs;
4750   unsigned AvailableVRs = NumVRs;
4751 
4752   for (const ISD::OutputArg& Param : Outs) {
4753     if (Param.Flags.isNest()) continue;
4754 
4755     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4756                                LinkageSize, ParamAreaSize, NumBytes,
4757                                AvailableFPRs, AvailableVRs))
4758       return true;
4759   }
4760   return false;
4761 }
4762 
4763 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4764   if (CB.arg_size() != CallerFn->arg_size())
4765     return false;
4766 
4767   auto CalleeArgIter = CB.arg_begin();
4768   auto CalleeArgEnd = CB.arg_end();
4769   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4770 
4771   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4772     const Value* CalleeArg = *CalleeArgIter;
4773     const Value* CallerArg = &(*CallerArgIter);
4774     if (CalleeArg == CallerArg)
4775       continue;
4776 
4777     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4778     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4779     //      }
4780     // 1st argument of callee is undef and has the same type as caller.
4781     if (CalleeArg->getType() == CallerArg->getType() &&
4782         isa<UndefValue>(CalleeArg))
4783       continue;
4784 
4785     return false;
4786   }
4787 
4788   return true;
4789 }
4790 
4791 // Returns true if TCO is possible between the callers and callees
4792 // calling conventions.
4793 static bool
4794 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4795                                     CallingConv::ID CalleeCC) {
4796   // Tail calls are possible with fastcc and ccc.
4797   auto isTailCallableCC  = [] (CallingConv::ID CC){
4798       return  CC == CallingConv::C || CC == CallingConv::Fast;
4799   };
4800   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4801     return false;
4802 
4803   // We can safely tail call both fastcc and ccc callees from a c calling
4804   // convention caller. If the caller is fastcc, we may have less stack space
4805   // than a non-fastcc caller with the same signature so disable tail-calls in
4806   // that case.
4807   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4808 }
4809 
4810 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4811     SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4812     const SmallVectorImpl<ISD::OutputArg> &Outs,
4813     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4814   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4815 
4816   if (DisableSCO && !TailCallOpt) return false;
4817 
4818   // Variadic argument functions are not supported.
4819   if (isVarArg) return false;
4820 
4821   auto &Caller = DAG.getMachineFunction().getFunction();
4822   // Check that the calling conventions are compatible for tco.
4823   if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4824     return false;
4825 
4826   // Caller contains any byval parameter is not supported.
4827   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4828     return false;
4829 
4830   // Callee contains any byval parameter is not supported, too.
4831   // Note: This is a quick work around, because in some cases, e.g.
4832   // caller's stack size > callee's stack size, we are still able to apply
4833   // sibling call optimization. For example, gcc is able to do SCO for caller1
4834   // in the following example, but not for caller2.
4835   //   struct test {
4836   //     long int a;
4837   //     char ary[56];
4838   //   } gTest;
4839   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4840   //     b->a = v.a;
4841   //     return 0;
4842   //   }
4843   //   void caller1(struct test a, struct test c, struct test *b) {
4844   //     callee(gTest, b); }
4845   //   void caller2(struct test *b) { callee(gTest, b); }
4846   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4847     return false;
4848 
4849   // If callee and caller use different calling conventions, we cannot pass
4850   // parameters on stack since offsets for the parameter area may be different.
4851   if (Caller.getCallingConv() != CalleeCC &&
4852       needStackSlotPassParameters(Subtarget, Outs))
4853     return false;
4854 
4855   // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4856   // the caller and callee share the same TOC for TCO/SCO. If the caller and
4857   // callee potentially have different TOC bases then we cannot tail call since
4858   // we need to restore the TOC pointer after the call.
4859   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4860   // We cannot guarantee this for indirect calls or calls to external functions.
4861   // When PC-Relative addressing is used, the concept of the TOC is no longer
4862   // applicable so this check is not required.
4863   // Check first for indirect calls.
4864   if (!Subtarget.isUsingPCRelativeCalls() &&
4865       !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4866     return false;
4867 
4868   // Check if we share the TOC base.
4869   if (!Subtarget.isUsingPCRelativeCalls() &&
4870       !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4871     return false;
4872 
4873   // TCO allows altering callee ABI, so we don't have to check further.
4874   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4875     return true;
4876 
4877   if (DisableSCO) return false;
4878 
4879   // If callee use the same argument list that caller is using, then we can
4880   // apply SCO on this case. If it is not, then we need to check if callee needs
4881   // stack for passing arguments.
4882   // PC Relative tail calls may not have a CallBase.
4883   // If there is no CallBase we cannot verify if we have the same argument
4884   // list so assume that we don't have the same argument list.
4885   if (CB && !hasSameArgumentList(&Caller, *CB) &&
4886       needStackSlotPassParameters(Subtarget, Outs))
4887     return false;
4888   else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4889     return false;
4890 
4891   return true;
4892 }
4893 
4894 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4895 /// for tail call optimization. Targets which want to do tail call
4896 /// optimization should implement this function.
4897 bool
4898 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4899                                                      CallingConv::ID CalleeCC,
4900                                                      bool isVarArg,
4901                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4902                                                      SelectionDAG& DAG) const {
4903   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4904     return false;
4905 
4906   // Variable argument functions are not supported.
4907   if (isVarArg)
4908     return false;
4909 
4910   MachineFunction &MF = DAG.getMachineFunction();
4911   CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4912   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4913     // Functions containing by val parameters are not supported.
4914     for (unsigned i = 0; i != Ins.size(); i++) {
4915        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4916        if (Flags.isByVal()) return false;
4917     }
4918 
4919     // Non-PIC/GOT tail calls are supported.
4920     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4921       return true;
4922 
4923     // At the moment we can only do local tail calls (in same module, hidden
4924     // or protected) if we are generating PIC.
4925     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4926       return G->getGlobal()->hasHiddenVisibility()
4927           || G->getGlobal()->hasProtectedVisibility();
4928   }
4929 
4930   return false;
4931 }
4932 
4933 /// isCallCompatibleAddress - Return the immediate to use if the specified
4934 /// 32-bit value is representable in the immediate field of a BxA instruction.
4935 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4936   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4937   if (!C) return nullptr;
4938 
4939   int Addr = C->getZExtValue();
4940   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4941       SignExtend32<26>(Addr) != Addr)
4942     return nullptr;  // Top 6 bits have to be sext of immediate.
4943 
4944   return DAG
4945       .getConstant(
4946           (int)C->getZExtValue() >> 2, SDLoc(Op),
4947           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4948       .getNode();
4949 }
4950 
4951 namespace {
4952 
4953 struct TailCallArgumentInfo {
4954   SDValue Arg;
4955   SDValue FrameIdxOp;
4956   int FrameIdx = 0;
4957 
4958   TailCallArgumentInfo() = default;
4959 };
4960 
4961 } // end anonymous namespace
4962 
4963 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4964 static void StoreTailCallArgumentsToStackSlot(
4965     SelectionDAG &DAG, SDValue Chain,
4966     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4967     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4968   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4969     SDValue Arg = TailCallArgs[i].Arg;
4970     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4971     int FI = TailCallArgs[i].FrameIdx;
4972     // Store relative to framepointer.
4973     MemOpChains.push_back(DAG.getStore(
4974         Chain, dl, Arg, FIN,
4975         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4976   }
4977 }
4978 
4979 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4980 /// the appropriate stack slot for the tail call optimized function call.
4981 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4982                                              SDValue OldRetAddr, SDValue OldFP,
4983                                              int SPDiff, const SDLoc &dl) {
4984   if (SPDiff) {
4985     // Calculate the new stack slot for the return address.
4986     MachineFunction &MF = DAG.getMachineFunction();
4987     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4988     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4989     bool isPPC64 = Subtarget.isPPC64();
4990     int SlotSize = isPPC64 ? 8 : 4;
4991     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4992     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4993                                                          NewRetAddrLoc, true);
4994     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4995     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4996     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4997                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4998   }
4999   return Chain;
5000 }
5001 
5002 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5003 /// the position of the argument.
5004 static void
5005 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5006                          SDValue Arg, int SPDiff, unsigned ArgOffset,
5007                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5008   int Offset = ArgOffset + SPDiff;
5009   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5010   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5011   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5012   SDValue FIN = DAG.getFrameIndex(FI, VT);
5013   TailCallArgumentInfo Info;
5014   Info.Arg = Arg;
5015   Info.FrameIdxOp = FIN;
5016   Info.FrameIdx = FI;
5017   TailCallArguments.push_back(Info);
5018 }
5019 
5020 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5021 /// stack slot. Returns the chain as result and the loaded frame pointers in
5022 /// LROpOut/FPOpout. Used when tail calling.
5023 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5024     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5025     SDValue &FPOpOut, const SDLoc &dl) const {
5026   if (SPDiff) {
5027     // Load the LR and FP stack slot for later adjusting.
5028     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5029     LROpOut = getReturnAddrFrameIndex(DAG);
5030     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5031     Chain = SDValue(LROpOut.getNode(), 1);
5032   }
5033   return Chain;
5034 }
5035 
5036 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5037 /// by "Src" to address "Dst" of size "Size".  Alignment information is
5038 /// specified by the specific parameter attribute. The copy will be passed as
5039 /// a byval function parameter.
5040 /// Sometimes what we are copying is the end of a larger object, the part that
5041 /// does not fit in registers.
5042 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5043                                          SDValue Chain, ISD::ArgFlagsTy Flags,
5044                                          SelectionDAG &DAG, const SDLoc &dl) {
5045   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5046   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5047                        Flags.getNonZeroByValAlign(), false, false, false,
5048                        MachinePointerInfo(), MachinePointerInfo());
5049 }
5050 
5051 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5052 /// tail calls.
5053 static void LowerMemOpCallTo(
5054     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5055     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5056     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5057     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5058   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5059   if (!isTailCall) {
5060     if (isVector) {
5061       SDValue StackPtr;
5062       if (isPPC64)
5063         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5064       else
5065         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5066       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5067                            DAG.getConstant(ArgOffset, dl, PtrVT));
5068     }
5069     MemOpChains.push_back(
5070         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5071     // Calculate and remember argument location.
5072   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5073                                   TailCallArguments);
5074 }
5075 
5076 static void
5077 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5078                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5079                 SDValue FPOp,
5080                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5081   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5082   // might overwrite each other in case of tail call optimization.
5083   SmallVector<SDValue, 8> MemOpChains2;
5084   // Do not flag preceding copytoreg stuff together with the following stuff.
5085   InFlag = SDValue();
5086   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5087                                     MemOpChains2, dl);
5088   if (!MemOpChains2.empty())
5089     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5090 
5091   // Store the return address to the appropriate stack slot.
5092   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5093 
5094   // Emit callseq_end just before tailcall node.
5095   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5096                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5097   InFlag = Chain.getValue(1);
5098 }
5099 
5100 // Is this global address that of a function that can be called by name? (as
5101 // opposed to something that must hold a descriptor for an indirect call).
5102 static bool isFunctionGlobalAddress(SDValue Callee) {
5103   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5104     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5105         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5106       return false;
5107 
5108     return G->getGlobal()->getValueType()->isFunctionTy();
5109   }
5110 
5111   return false;
5112 }
5113 
5114 SDValue PPCTargetLowering::LowerCallResult(
5115     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5116     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5117     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5118   SmallVector<CCValAssign, 16> RVLocs;
5119   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5120                     *DAG.getContext());
5121 
5122   CCRetInfo.AnalyzeCallResult(
5123       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5124                ? RetCC_PPC_Cold
5125                : RetCC_PPC);
5126 
5127   // Copy all of the result registers out of their specified physreg.
5128   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5129     CCValAssign &VA = RVLocs[i];
5130     assert(VA.isRegLoc() && "Can only return in registers!");
5131 
5132     SDValue Val;
5133 
5134     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5135       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5136                                       InFlag);
5137       Chain = Lo.getValue(1);
5138       InFlag = Lo.getValue(2);
5139       VA = RVLocs[++i]; // skip ahead to next loc
5140       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5141                                       InFlag);
5142       Chain = Hi.getValue(1);
5143       InFlag = Hi.getValue(2);
5144       if (!Subtarget.isLittleEndian())
5145         std::swap (Lo, Hi);
5146       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5147     } else {
5148       Val = DAG.getCopyFromReg(Chain, dl,
5149                                VA.getLocReg(), VA.getLocVT(), InFlag);
5150       Chain = Val.getValue(1);
5151       InFlag = Val.getValue(2);
5152     }
5153 
5154     switch (VA.getLocInfo()) {
5155     default: llvm_unreachable("Unknown loc info!");
5156     case CCValAssign::Full: break;
5157     case CCValAssign::AExt:
5158       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5159       break;
5160     case CCValAssign::ZExt:
5161       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5162                         DAG.getValueType(VA.getValVT()));
5163       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5164       break;
5165     case CCValAssign::SExt:
5166       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5167                         DAG.getValueType(VA.getValVT()));
5168       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5169       break;
5170     }
5171 
5172     InVals.push_back(Val);
5173   }
5174 
5175   return Chain;
5176 }
5177 
5178 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5179                            const PPCSubtarget &Subtarget, bool isPatchPoint) {
5180   // PatchPoint calls are not indirect.
5181   if (isPatchPoint)
5182     return false;
5183 
5184   if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
5185     return false;
5186 
5187   // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5188   // becuase the immediate function pointer points to a descriptor instead of
5189   // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5190   // pointer immediate points to the global entry point, while the BLA would
5191   // need to jump to the local entry point (see rL211174).
5192   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5193       isBLACompatibleAddress(Callee, DAG))
5194     return false;
5195 
5196   return true;
5197 }
5198 
5199 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5200 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5201   return Subtarget.isAIXABI() ||
5202          (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5203 }
5204 
5205 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5206                               const Function &Caller,
5207                               const SDValue &Callee,
5208                               const PPCSubtarget &Subtarget,
5209                               const TargetMachine &TM) {
5210   if (CFlags.IsTailCall)
5211     return PPCISD::TC_RETURN;
5212 
5213   // This is a call through a function pointer.
5214   if (CFlags.IsIndirect) {
5215     // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5216     // indirect calls. The save of the caller's TOC pointer to the stack will be
5217     // inserted into the DAG as part of call lowering. The restore of the TOC
5218     // pointer is modeled by using a pseudo instruction for the call opcode that
5219     // represents the 2 instruction sequence of an indirect branch and link,
5220     // immediately followed by a load of the TOC pointer from the the stack save
5221     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5222     // as it is not saved or used.
5223     return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5224                                                : PPCISD::BCTRL;
5225   }
5226 
5227   if (Subtarget.isUsingPCRelativeCalls()) {
5228     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5229     return PPCISD::CALL_NOTOC;
5230   }
5231 
5232   // The ABIs that maintain a TOC pointer accross calls need to have a nop
5233   // immediately following the call instruction if the caller and callee may
5234   // have different TOC bases. At link time if the linker determines the calls
5235   // may not share a TOC base, the call is redirected to a trampoline inserted
5236   // by the linker. The trampoline will (among other things) save the callers
5237   // TOC pointer at an ABI designated offset in the linkage area and the linker
5238   // will rewrite the nop to be a load of the TOC pointer from the linkage area
5239   // into gpr2.
5240   if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5241     return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5242                                                   : PPCISD::CALL_NOP;
5243 
5244   return PPCISD::CALL;
5245 }
5246 
5247 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5248                                const SDLoc &dl, const PPCSubtarget &Subtarget) {
5249   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5250     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5251       return SDValue(Dest, 0);
5252 
5253   // Returns true if the callee is local, and false otherwise.
5254   auto isLocalCallee = [&]() {
5255     const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5256     const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5257     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5258 
5259     return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5260            !dyn_cast_or_null<GlobalIFunc>(GV);
5261   };
5262 
5263   // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5264   // a static relocation model causes some versions of GNU LD (2.17.50, at
5265   // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5266   // built with secure-PLT.
5267   bool UsePlt =
5268       Subtarget.is32BitELFABI() && !isLocalCallee() &&
5269       Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5270 
5271   const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5272     const TargetMachine &TM = Subtarget.getTargetMachine();
5273     const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5274     MCSymbolXCOFF *S =
5275         cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5276 
5277     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5278     return DAG.getMCSymbol(S, PtrVT);
5279   };
5280 
5281   if (isFunctionGlobalAddress(Callee)) {
5282     const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5283 
5284     if (Subtarget.isAIXABI()) {
5285       assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5286       return getAIXFuncEntryPointSymbolSDNode(GV);
5287     }
5288     return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5289                                       UsePlt ? PPCII::MO_PLT : 0);
5290   }
5291 
5292   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5293     const char *SymName = S->getSymbol();
5294     if (Subtarget.isAIXABI()) {
5295       // If there exists a user-declared function whose name is the same as the
5296       // ExternalSymbol's, then we pick up the user-declared version.
5297       const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5298       if (const Function *F =
5299               dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5300         return getAIXFuncEntryPointSymbolSDNode(F);
5301 
5302       // On AIX, direct function calls reference the symbol for the function's
5303       // entry point, which is named by prepending a "." before the function's
5304       // C-linkage name. A Qualname is returned here because an external
5305       // function entry point is a csect with XTY_ER property.
5306       const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5307         auto &Context = DAG.getMachineFunction().getMMI().getContext();
5308         MCSectionXCOFF *Sec = Context.getXCOFFSection(
5309             (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5310             SectionKind::getMetadata());
5311         return Sec->getQualNameSymbol();
5312       };
5313 
5314       SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5315     }
5316     return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5317                                        UsePlt ? PPCII::MO_PLT : 0);
5318   }
5319 
5320   // No transformation needed.
5321   assert(Callee.getNode() && "What no callee?");
5322   return Callee;
5323 }
5324 
5325 static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5326   assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5327          "Expected a CALLSEQ_STARTSDNode.");
5328 
5329   // The last operand is the chain, except when the node has glue. If the node
5330   // has glue, then the last operand is the glue, and the chain is the second
5331   // last operand.
5332   SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5333   if (LastValue.getValueType() != MVT::Glue)
5334     return LastValue;
5335 
5336   return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5337 }
5338 
5339 // Creates the node that moves a functions address into the count register
5340 // to prepare for an indirect call instruction.
5341 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5342                                 SDValue &Glue, SDValue &Chain,
5343                                 const SDLoc &dl) {
5344   SDValue MTCTROps[] = {Chain, Callee, Glue};
5345   EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5346   Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5347                       makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5348   // The glue is the second value produced.
5349   Glue = Chain.getValue(1);
5350 }
5351 
5352 static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5353                                           SDValue &Glue, SDValue &Chain,
5354                                           SDValue CallSeqStart,
5355                                           const CallBase *CB, const SDLoc &dl,
5356                                           bool hasNest,
5357                                           const PPCSubtarget &Subtarget) {
5358   // Function pointers in the 64-bit SVR4 ABI do not point to the function
5359   // entry point, but to the function descriptor (the function entry point
5360   // address is part of the function descriptor though).
5361   // The function descriptor is a three doubleword structure with the
5362   // following fields: function entry point, TOC base address and
5363   // environment pointer.
5364   // Thus for a call through a function pointer, the following actions need
5365   // to be performed:
5366   //   1. Save the TOC of the caller in the TOC save area of its stack
5367   //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5368   //   2. Load the address of the function entry point from the function
5369   //      descriptor.
5370   //   3. Load the TOC of the callee from the function descriptor into r2.
5371   //   4. Load the environment pointer from the function descriptor into
5372   //      r11.
5373   //   5. Branch to the function entry point address.
5374   //   6. On return of the callee, the TOC of the caller needs to be
5375   //      restored (this is done in FinishCall()).
5376   //
5377   // The loads are scheduled at the beginning of the call sequence, and the
5378   // register copies are flagged together to ensure that no other
5379   // operations can be scheduled in between. E.g. without flagging the
5380   // copies together, a TOC access in the caller could be scheduled between
5381   // the assignment of the callee TOC and the branch to the callee, which leads
5382   // to incorrect code.
5383 
5384   // Start by loading the function address from the descriptor.
5385   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5386   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5387                       ? (MachineMemOperand::MODereferenceable |
5388                          MachineMemOperand::MOInvariant)
5389                       : MachineMemOperand::MONone;
5390 
5391   MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5392 
5393   // Registers used in building the DAG.
5394   const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5395   const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5396 
5397   // Offsets of descriptor members.
5398   const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5399   const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5400 
5401   const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5402   const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5403 
5404   // One load for the functions entry point address.
5405   SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5406                                     Alignment, MMOFlags);
5407 
5408   // One for loading the TOC anchor for the module that contains the called
5409   // function.
5410   SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5411   SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5412   SDValue TOCPtr =
5413       DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5414                   MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5415 
5416   // One for loading the environment pointer.
5417   SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5418   SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5419   SDValue LoadEnvPtr =
5420       DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5421                   MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5422 
5423 
5424   // Then copy the newly loaded TOC anchor to the TOC pointer.
5425   SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5426   Chain = TOCVal.getValue(0);
5427   Glue = TOCVal.getValue(1);
5428 
5429   // If the function call has an explicit 'nest' parameter, it takes the
5430   // place of the environment pointer.
5431   assert((!hasNest || !Subtarget.isAIXABI()) &&
5432          "Nest parameter is not supported on AIX.");
5433   if (!hasNest) {
5434     SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5435     Chain = EnvVal.getValue(0);
5436     Glue = EnvVal.getValue(1);
5437   }
5438 
5439   // The rest of the indirect call sequence is the same as the non-descriptor
5440   // DAG.
5441   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5442 }
5443 
5444 static void
5445 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5446                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5447                   SelectionDAG &DAG,
5448                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5449                   SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5450                   const PPCSubtarget &Subtarget) {
5451   const bool IsPPC64 = Subtarget.isPPC64();
5452   // MVT for a general purpose register.
5453   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5454 
5455   // First operand is always the chain.
5456   Ops.push_back(Chain);
5457 
5458   // If it's a direct call pass the callee as the second operand.
5459   if (!CFlags.IsIndirect)
5460     Ops.push_back(Callee);
5461   else {
5462     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5463 
5464     // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5465     // on the stack (this would have been done in `LowerCall_64SVR4` or
5466     // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5467     // represents both the indirect branch and a load that restores the TOC
5468     // pointer from the linkage area. The operand for the TOC restore is an add
5469     // of the TOC save offset to the stack pointer. This must be the second
5470     // operand: after the chain input but before any other variadic arguments.
5471     // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5472     // saved or used.
5473     if (isTOCSaveRestoreRequired(Subtarget)) {
5474       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5475 
5476       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5477       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5478       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5479       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5480       Ops.push_back(AddTOC);
5481     }
5482 
5483     // Add the register used for the environment pointer.
5484     if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5485       Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5486                                     RegVT));
5487 
5488 
5489     // Add CTR register as callee so a bctr can be emitted later.
5490     if (CFlags.IsTailCall)
5491       Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5492   }
5493 
5494   // If this is a tail call add stack pointer delta.
5495   if (CFlags.IsTailCall)
5496     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5497 
5498   // Add argument registers to the end of the list so that they are known live
5499   // into the call.
5500   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5501     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5502                                   RegsToPass[i].second.getValueType()));
5503 
5504   // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5505   // no way to mark dependencies as implicit here.
5506   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5507   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5508        !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5509     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5510 
5511   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5512   if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5513     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5514 
5515   // Add a register mask operand representing the call-preserved registers.
5516   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5517   const uint32_t *Mask =
5518       TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5519   assert(Mask && "Missing call preserved mask for calling convention");
5520   Ops.push_back(DAG.getRegisterMask(Mask));
5521 
5522   // If the glue is valid, it is the last operand.
5523   if (Glue.getNode())
5524     Ops.push_back(Glue);
5525 }
5526 
5527 SDValue PPCTargetLowering::FinishCall(
5528     CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5529     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5530     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5531     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5532     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5533 
5534   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5535       Subtarget.isAIXABI())
5536     setUsesTOCBasePtr(DAG);
5537 
5538   unsigned CallOpc =
5539       getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5540                     Subtarget, DAG.getTarget());
5541 
5542   if (!CFlags.IsIndirect)
5543     Callee = transformCallee(Callee, DAG, dl, Subtarget);
5544   else if (Subtarget.usesFunctionDescriptors())
5545     prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5546                                   dl, CFlags.HasNest, Subtarget);
5547   else
5548     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5549 
5550   // Build the operand list for the call instruction.
5551   SmallVector<SDValue, 8> Ops;
5552   buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5553                     SPDiff, Subtarget);
5554 
5555   // Emit tail call.
5556   if (CFlags.IsTailCall) {
5557     // Indirect tail call when using PC Relative calls do not have the same
5558     // constraints.
5559     assert(((Callee.getOpcode() == ISD::Register &&
5560              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5561             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5562             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5563             isa<ConstantSDNode>(Callee) ||
5564             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5565            "Expecting a global address, external symbol, absolute value, "
5566            "register or an indirect tail call when PC Relative calls are "
5567            "used.");
5568     // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5569     assert(CallOpc == PPCISD::TC_RETURN &&
5570            "Unexpected call opcode for a tail call.");
5571     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5572     return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5573   }
5574 
5575   std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5576   Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5577   DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5578   Glue = Chain.getValue(1);
5579 
5580   // When performing tail call optimization the callee pops its arguments off
5581   // the stack. Account for this here so these bytes can be pushed back on in
5582   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5583   int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5584                          getTargetMachine().Options.GuaranteedTailCallOpt)
5585                             ? NumBytes
5586                             : 0;
5587 
5588   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5589                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5590                              Glue, dl);
5591   Glue = Chain.getValue(1);
5592 
5593   return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5594                          DAG, InVals);
5595 }
5596 
5597 SDValue
5598 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5599                              SmallVectorImpl<SDValue> &InVals) const {
5600   SelectionDAG &DAG                     = CLI.DAG;
5601   SDLoc &dl                             = CLI.DL;
5602   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5603   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5604   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5605   SDValue Chain                         = CLI.Chain;
5606   SDValue Callee                        = CLI.Callee;
5607   bool &isTailCall                      = CLI.IsTailCall;
5608   CallingConv::ID CallConv              = CLI.CallConv;
5609   bool isVarArg                         = CLI.IsVarArg;
5610   bool isPatchPoint                     = CLI.IsPatchPoint;
5611   const CallBase *CB                    = CLI.CB;
5612 
5613   if (isTailCall) {
5614     if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5615       isTailCall = false;
5616     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5617       isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5618           Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5619     else
5620       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5621                                                      Ins, DAG);
5622     if (isTailCall) {
5623       ++NumTailCalls;
5624       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5625         ++NumSiblingCalls;
5626 
5627       // PC Relative calls no longer guarantee that the callee is a Global
5628       // Address Node. The callee could be an indirect tail call in which
5629       // case the SDValue for the callee could be a load (to load the address
5630       // of a function pointer) or it may be a register copy (to move the
5631       // address of the callee from a function parameter into a virtual
5632       // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5633       assert((Subtarget.isUsingPCRelativeCalls() ||
5634               isa<GlobalAddressSDNode>(Callee)) &&
5635              "Callee should be an llvm::Function object.");
5636 
5637       LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5638                         << "\nTCO callee: ");
5639       LLVM_DEBUG(Callee.dump());
5640     }
5641   }
5642 
5643   if (!isTailCall && CB && CB->isMustTailCall())
5644     report_fatal_error("failed to perform tail call elimination on a call "
5645                        "site marked musttail");
5646 
5647   // When long calls (i.e. indirect calls) are always used, calls are always
5648   // made via function pointer. If we have a function name, first translate it
5649   // into a pointer.
5650   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5651       !isTailCall)
5652     Callee = LowerGlobalAddress(Callee, DAG);
5653 
5654   CallFlags CFlags(
5655       CallConv, isTailCall, isVarArg, isPatchPoint,
5656       isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5657       // hasNest
5658       Subtarget.is64BitELFABI() &&
5659           any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5660       CLI.NoMerge);
5661 
5662   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5663     return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5664                             InVals, CB);
5665 
5666   if (Subtarget.isSVR4ABI())
5667     return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5668                             InVals, CB);
5669 
5670   if (Subtarget.isAIXABI())
5671     return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5672                          InVals, CB);
5673 
5674   return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5675                           InVals, CB);
5676 }
5677 
5678 SDValue PPCTargetLowering::LowerCall_32SVR4(
5679     SDValue Chain, SDValue Callee, CallFlags CFlags,
5680     const SmallVectorImpl<ISD::OutputArg> &Outs,
5681     const SmallVectorImpl<SDValue> &OutVals,
5682     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5683     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5684     const CallBase *CB) const {
5685   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5686   // of the 32-bit SVR4 ABI stack frame layout.
5687 
5688   const CallingConv::ID CallConv = CFlags.CallConv;
5689   const bool IsVarArg = CFlags.IsVarArg;
5690   const bool IsTailCall = CFlags.IsTailCall;
5691 
5692   assert((CallConv == CallingConv::C ||
5693           CallConv == CallingConv::Cold ||
5694           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5695 
5696   const Align PtrAlign(4);
5697 
5698   MachineFunction &MF = DAG.getMachineFunction();
5699 
5700   // Mark this function as potentially containing a function that contains a
5701   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5702   // and restoring the callers stack pointer in this functions epilog. This is
5703   // done because by tail calling the called function might overwrite the value
5704   // in this function's (MF) stack pointer stack slot 0(SP).
5705   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5706       CallConv == CallingConv::Fast)
5707     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5708 
5709   // Count how many bytes are to be pushed on the stack, including the linkage
5710   // area, parameter list area and the part of the local variable space which
5711   // contains copies of aggregates which are passed by value.
5712 
5713   // Assign locations to all of the outgoing arguments.
5714   SmallVector<CCValAssign, 16> ArgLocs;
5715   PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5716 
5717   // Reserve space for the linkage area on the stack.
5718   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5719                        PtrAlign);
5720   if (useSoftFloat())
5721     CCInfo.PreAnalyzeCallOperands(Outs);
5722 
5723   if (IsVarArg) {
5724     // Handle fixed and variable vector arguments differently.
5725     // Fixed vector arguments go into registers as long as registers are
5726     // available. Variable vector arguments always go into memory.
5727     unsigned NumArgs = Outs.size();
5728 
5729     for (unsigned i = 0; i != NumArgs; ++i) {
5730       MVT ArgVT = Outs[i].VT;
5731       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5732       bool Result;
5733 
5734       if (Outs[i].IsFixed) {
5735         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5736                                CCInfo);
5737       } else {
5738         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5739                                       ArgFlags, CCInfo);
5740       }
5741 
5742       if (Result) {
5743 #ifndef NDEBUG
5744         errs() << "Call operand #" << i << " has unhandled type "
5745              << EVT(ArgVT).getEVTString() << "\n";
5746 #endif
5747         llvm_unreachable(nullptr);
5748       }
5749     }
5750   } else {
5751     // All arguments are treated the same.
5752     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5753   }
5754   CCInfo.clearWasPPCF128();
5755 
5756   // Assign locations to all of the outgoing aggregate by value arguments.
5757   SmallVector<CCValAssign, 16> ByValArgLocs;
5758   CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5759 
5760   // Reserve stack space for the allocations in CCInfo.
5761   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5762 
5763   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5764 
5765   // Size of the linkage area, parameter list area and the part of the local
5766   // space variable where copies of aggregates which are passed by value are
5767   // stored.
5768   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5769 
5770   // Calculate by how many bytes the stack has to be adjusted in case of tail
5771   // call optimization.
5772   int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5773 
5774   // Adjust the stack pointer for the new arguments...
5775   // These operations are automatically eliminated by the prolog/epilog pass
5776   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5777   SDValue CallSeqStart = Chain;
5778 
5779   // Load the return address and frame pointer so it can be moved somewhere else
5780   // later.
5781   SDValue LROp, FPOp;
5782   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5783 
5784   // Set up a copy of the stack pointer for use loading and storing any
5785   // arguments that may not fit in the registers available for argument
5786   // passing.
5787   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5788 
5789   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5790   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5791   SmallVector<SDValue, 8> MemOpChains;
5792 
5793   bool seenFloatArg = false;
5794   // Walk the register/memloc assignments, inserting copies/loads.
5795   // i - Tracks the index into the list of registers allocated for the call
5796   // RealArgIdx - Tracks the index into the list of actual function arguments
5797   // j - Tracks the index into the list of byval arguments
5798   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5799        i != e;
5800        ++i, ++RealArgIdx) {
5801     CCValAssign &VA = ArgLocs[i];
5802     SDValue Arg = OutVals[RealArgIdx];
5803     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5804 
5805     if (Flags.isByVal()) {
5806       // Argument is an aggregate which is passed by value, thus we need to
5807       // create a copy of it in the local variable space of the current stack
5808       // frame (which is the stack frame of the caller) and pass the address of
5809       // this copy to the callee.
5810       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5811       CCValAssign &ByValVA = ByValArgLocs[j++];
5812       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5813 
5814       // Memory reserved in the local variable space of the callers stack frame.
5815       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5816 
5817       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5818       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5819                            StackPtr, PtrOff);
5820 
5821       // Create a copy of the argument in the local area of the current
5822       // stack frame.
5823       SDValue MemcpyCall =
5824         CreateCopyOfByValArgument(Arg, PtrOff,
5825                                   CallSeqStart.getNode()->getOperand(0),
5826                                   Flags, DAG, dl);
5827 
5828       // This must go outside the CALLSEQ_START..END.
5829       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5830                                                      SDLoc(MemcpyCall));
5831       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5832                              NewCallSeqStart.getNode());
5833       Chain = CallSeqStart = NewCallSeqStart;
5834 
5835       // Pass the address of the aggregate copy on the stack either in a
5836       // physical register or in the parameter list area of the current stack
5837       // frame to the callee.
5838       Arg = PtrOff;
5839     }
5840 
5841     // When useCRBits() is true, there can be i1 arguments.
5842     // It is because getRegisterType(MVT::i1) => MVT::i1,
5843     // and for other integer types getRegisterType() => MVT::i32.
5844     // Extend i1 and ensure callee will get i32.
5845     if (Arg.getValueType() == MVT::i1)
5846       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5847                         dl, MVT::i32, Arg);
5848 
5849     if (VA.isRegLoc()) {
5850       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5851       // Put argument in a physical register.
5852       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5853         bool IsLE = Subtarget.isLittleEndian();
5854         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5855                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5856         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5857         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5858                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5859         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5860                              SVal.getValue(0)));
5861       } else
5862         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5863     } else {
5864       // Put argument in the parameter list area of the current stack frame.
5865       assert(VA.isMemLoc());
5866       unsigned LocMemOffset = VA.getLocMemOffset();
5867 
5868       if (!IsTailCall) {
5869         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5870         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5871                              StackPtr, PtrOff);
5872 
5873         MemOpChains.push_back(
5874             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5875       } else {
5876         // Calculate and remember argument location.
5877         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5878                                  TailCallArguments);
5879       }
5880     }
5881   }
5882 
5883   if (!MemOpChains.empty())
5884     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5885 
5886   // Build a sequence of copy-to-reg nodes chained together with token chain
5887   // and flag operands which copy the outgoing args into the appropriate regs.
5888   SDValue InFlag;
5889   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5890     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5891                              RegsToPass[i].second, InFlag);
5892     InFlag = Chain.getValue(1);
5893   }
5894 
5895   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5896   // registers.
5897   if (IsVarArg) {
5898     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5899     SDValue Ops[] = { Chain, InFlag };
5900 
5901     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5902                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5903 
5904     InFlag = Chain.getValue(1);
5905   }
5906 
5907   if (IsTailCall)
5908     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5909                     TailCallArguments);
5910 
5911   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5912                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
5913 }
5914 
5915 // Copy an argument into memory, being careful to do this outside the
5916 // call sequence for the call to which the argument belongs.
5917 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5918     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5919     SelectionDAG &DAG, const SDLoc &dl) const {
5920   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5921                         CallSeqStart.getNode()->getOperand(0),
5922                         Flags, DAG, dl);
5923   // The MEMCPY must go outside the CALLSEQ_START..END.
5924   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5925   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5926                                                  SDLoc(MemcpyCall));
5927   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5928                          NewCallSeqStart.getNode());
5929   return NewCallSeqStart;
5930 }
5931 
5932 SDValue PPCTargetLowering::LowerCall_64SVR4(
5933     SDValue Chain, SDValue Callee, CallFlags CFlags,
5934     const SmallVectorImpl<ISD::OutputArg> &Outs,
5935     const SmallVectorImpl<SDValue> &OutVals,
5936     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5937     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5938     const CallBase *CB) const {
5939   bool isELFv2ABI = Subtarget.isELFv2ABI();
5940   bool isLittleEndian = Subtarget.isLittleEndian();
5941   unsigned NumOps = Outs.size();
5942   bool IsSibCall = false;
5943   bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5944 
5945   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5946   unsigned PtrByteSize = 8;
5947 
5948   MachineFunction &MF = DAG.getMachineFunction();
5949 
5950   if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5951     IsSibCall = true;
5952 
5953   // Mark this function as potentially containing a function that contains a
5954   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5955   // and restoring the callers stack pointer in this functions epilog. This is
5956   // done because by tail calling the called function might overwrite the value
5957   // in this function's (MF) stack pointer stack slot 0(SP).
5958   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5959     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5960 
5961   assert(!(IsFastCall && CFlags.IsVarArg) &&
5962          "fastcc not supported on varargs functions");
5963 
5964   // Count how many bytes are to be pushed on the stack, including the linkage
5965   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5966   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5967   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5968   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5969   unsigned NumBytes = LinkageSize;
5970   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5971 
5972   static const MCPhysReg GPR[] = {
5973     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5974     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5975   };
5976   static const MCPhysReg VR[] = {
5977     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5978     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5979   };
5980 
5981   const unsigned NumGPRs = array_lengthof(GPR);
5982   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5983   const unsigned NumVRs  = array_lengthof(VR);
5984 
5985   // On ELFv2, we can avoid allocating the parameter area if all the arguments
5986   // can be passed to the callee in registers.
5987   // For the fast calling convention, there is another check below.
5988   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5989   bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5990   if (!HasParameterArea) {
5991     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5992     unsigned AvailableFPRs = NumFPRs;
5993     unsigned AvailableVRs = NumVRs;
5994     unsigned NumBytesTmp = NumBytes;
5995     for (unsigned i = 0; i != NumOps; ++i) {
5996       if (Outs[i].Flags.isNest()) continue;
5997       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5998                                  PtrByteSize, LinkageSize, ParamAreaSize,
5999                                  NumBytesTmp, AvailableFPRs, AvailableVRs))
6000         HasParameterArea = true;
6001     }
6002   }
6003 
6004   // When using the fast calling convention, we don't provide backing for
6005   // arguments that will be in registers.
6006   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6007 
6008   // Avoid allocating parameter area for fastcc functions if all the arguments
6009   // can be passed in the registers.
6010   if (IsFastCall)
6011     HasParameterArea = false;
6012 
6013   // Add up all the space actually used.
6014   for (unsigned i = 0; i != NumOps; ++i) {
6015     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6016     EVT ArgVT = Outs[i].VT;
6017     EVT OrigVT = Outs[i].ArgVT;
6018 
6019     if (Flags.isNest())
6020       continue;
6021 
6022     if (IsFastCall) {
6023       if (Flags.isByVal()) {
6024         NumGPRsUsed += (Flags.getByValSize()+7)/8;
6025         if (NumGPRsUsed > NumGPRs)
6026           HasParameterArea = true;
6027       } else {
6028         switch (ArgVT.getSimpleVT().SimpleTy) {
6029         default: llvm_unreachable("Unexpected ValueType for argument!");
6030         case MVT::i1:
6031         case MVT::i32:
6032         case MVT::i64:
6033           if (++NumGPRsUsed <= NumGPRs)
6034             continue;
6035           break;
6036         case MVT::v4i32:
6037         case MVT::v8i16:
6038         case MVT::v16i8:
6039         case MVT::v2f64:
6040         case MVT::v2i64:
6041         case MVT::v1i128:
6042         case MVT::f128:
6043           if (++NumVRsUsed <= NumVRs)
6044             continue;
6045           break;
6046         case MVT::v4f32:
6047           if (++NumVRsUsed <= NumVRs)
6048             continue;
6049           break;
6050         case MVT::f32:
6051         case MVT::f64:
6052           if (++NumFPRsUsed <= NumFPRs)
6053             continue;
6054           break;
6055         }
6056         HasParameterArea = true;
6057       }
6058     }
6059 
6060     /* Respect alignment of argument on the stack.  */
6061     auto Alignement =
6062         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6063     NumBytes = alignTo(NumBytes, Alignement);
6064 
6065     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6066     if (Flags.isInConsecutiveRegsLast())
6067       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6068   }
6069 
6070   unsigned NumBytesActuallyUsed = NumBytes;
6071 
6072   // In the old ELFv1 ABI,
6073   // the prolog code of the callee may store up to 8 GPR argument registers to
6074   // the stack, allowing va_start to index over them in memory if its varargs.
6075   // Because we cannot tell if this is needed on the caller side, we have to
6076   // conservatively assume that it is needed.  As such, make sure we have at
6077   // least enough stack space for the caller to store the 8 GPRs.
6078   // In the ELFv2 ABI, we allocate the parameter area iff a callee
6079   // really requires memory operands, e.g. a vararg function.
6080   if (HasParameterArea)
6081     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6082   else
6083     NumBytes = LinkageSize;
6084 
6085   // Tail call needs the stack to be aligned.
6086   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6087     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6088 
6089   int SPDiff = 0;
6090 
6091   // Calculate by how many bytes the stack has to be adjusted in case of tail
6092   // call optimization.
6093   if (!IsSibCall)
6094     SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6095 
6096   // To protect arguments on the stack from being clobbered in a tail call,
6097   // force all the loads to happen before doing any other lowering.
6098   if (CFlags.IsTailCall)
6099     Chain = DAG.getStackArgumentTokenFactor(Chain);
6100 
6101   // Adjust the stack pointer for the new arguments...
6102   // These operations are automatically eliminated by the prolog/epilog pass
6103   if (!IsSibCall)
6104     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6105   SDValue CallSeqStart = Chain;
6106 
6107   // Load the return address and frame pointer so it can be move somewhere else
6108   // later.
6109   SDValue LROp, FPOp;
6110   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6111 
6112   // Set up a copy of the stack pointer for use loading and storing any
6113   // arguments that may not fit in the registers available for argument
6114   // passing.
6115   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6116 
6117   // Figure out which arguments are going to go in registers, and which in
6118   // memory.  Also, if this is a vararg function, floating point operations
6119   // must be stored to our stack, and loaded into integer regs as well, if
6120   // any integer regs are available for argument passing.
6121   unsigned ArgOffset = LinkageSize;
6122 
6123   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6124   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6125 
6126   SmallVector<SDValue, 8> MemOpChains;
6127   for (unsigned i = 0; i != NumOps; ++i) {
6128     SDValue Arg = OutVals[i];
6129     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6130     EVT ArgVT = Outs[i].VT;
6131     EVT OrigVT = Outs[i].ArgVT;
6132 
6133     // PtrOff will be used to store the current argument to the stack if a
6134     // register cannot be found for it.
6135     SDValue PtrOff;
6136 
6137     // We re-align the argument offset for each argument, except when using the
6138     // fast calling convention, when we need to make sure we do that only when
6139     // we'll actually use a stack slot.
6140     auto ComputePtrOff = [&]() {
6141       /* Respect alignment of argument on the stack.  */
6142       auto Alignment =
6143           CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6144       ArgOffset = alignTo(ArgOffset, Alignment);
6145 
6146       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6147 
6148       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6149     };
6150 
6151     if (!IsFastCall) {
6152       ComputePtrOff();
6153 
6154       /* Compute GPR index associated with argument offset.  */
6155       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6156       GPR_idx = std::min(GPR_idx, NumGPRs);
6157     }
6158 
6159     // Promote integers to 64-bit values.
6160     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6161       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6162       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6163       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6164     }
6165 
6166     // FIXME memcpy is used way more than necessary.  Correctness first.
6167     // Note: "by value" is code for passing a structure by value, not
6168     // basic types.
6169     if (Flags.isByVal()) {
6170       // Note: Size includes alignment padding, so
6171       //   struct x { short a; char b; }
6172       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
6173       // These are the proper values we need for right-justifying the
6174       // aggregate in a parameter register.
6175       unsigned Size = Flags.getByValSize();
6176 
6177       // An empty aggregate parameter takes up no storage and no
6178       // registers.
6179       if (Size == 0)
6180         continue;
6181 
6182       if (IsFastCall)
6183         ComputePtrOff();
6184 
6185       // All aggregates smaller than 8 bytes must be passed right-justified.
6186       if (Size==1 || Size==2 || Size==4) {
6187         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6188         if (GPR_idx != NumGPRs) {
6189           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6190                                         MachinePointerInfo(), VT);
6191           MemOpChains.push_back(Load.getValue(1));
6192           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6193 
6194           ArgOffset += PtrByteSize;
6195           continue;
6196         }
6197       }
6198 
6199       if (GPR_idx == NumGPRs && Size < 8) {
6200         SDValue AddPtr = PtrOff;
6201         if (!isLittleEndian) {
6202           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6203                                           PtrOff.getValueType());
6204           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6205         }
6206         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6207                                                           CallSeqStart,
6208                                                           Flags, DAG, dl);
6209         ArgOffset += PtrByteSize;
6210         continue;
6211       }
6212       // Copy entire object into memory.  There are cases where gcc-generated
6213       // code assumes it is there, even if it could be put entirely into
6214       // registers.  (This is not what the doc says.)
6215 
6216       // FIXME: The above statement is likely due to a misunderstanding of the
6217       // documents.  All arguments must be copied into the parameter area BY
6218       // THE CALLEE in the event that the callee takes the address of any
6219       // formal argument.  That has not yet been implemented.  However, it is
6220       // reasonable to use the stack area as a staging area for the register
6221       // load.
6222 
6223       // Skip this for small aggregates, as we will use the same slot for a
6224       // right-justified copy, below.
6225       if (Size >= 8)
6226         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6227                                                           CallSeqStart,
6228                                                           Flags, DAG, dl);
6229 
6230       // When a register is available, pass a small aggregate right-justified.
6231       if (Size < 8 && GPR_idx != NumGPRs) {
6232         // The easiest way to get this right-justified in a register
6233         // is to copy the structure into the rightmost portion of a
6234         // local variable slot, then load the whole slot into the
6235         // register.
6236         // FIXME: The memcpy seems to produce pretty awful code for
6237         // small aggregates, particularly for packed ones.
6238         // FIXME: It would be preferable to use the slot in the
6239         // parameter save area instead of a new local variable.
6240         SDValue AddPtr = PtrOff;
6241         if (!isLittleEndian) {
6242           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6243           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6244         }
6245         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6246                                                           CallSeqStart,
6247                                                           Flags, DAG, dl);
6248 
6249         // Load the slot into the register.
6250         SDValue Load =
6251             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6252         MemOpChains.push_back(Load.getValue(1));
6253         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6254 
6255         // Done with this argument.
6256         ArgOffset += PtrByteSize;
6257         continue;
6258       }
6259 
6260       // For aggregates larger than PtrByteSize, copy the pieces of the
6261       // object that fit into registers from the parameter save area.
6262       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6263         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6264         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6265         if (GPR_idx != NumGPRs) {
6266           SDValue Load =
6267               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6268           MemOpChains.push_back(Load.getValue(1));
6269           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6270           ArgOffset += PtrByteSize;
6271         } else {
6272           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6273           break;
6274         }
6275       }
6276       continue;
6277     }
6278 
6279     switch (Arg.getSimpleValueType().SimpleTy) {
6280     default: llvm_unreachable("Unexpected ValueType for argument!");
6281     case MVT::i1:
6282     case MVT::i32:
6283     case MVT::i64:
6284       if (Flags.isNest()) {
6285         // The 'nest' parameter, if any, is passed in R11.
6286         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6287         break;
6288       }
6289 
6290       // These can be scalar arguments or elements of an integer array type
6291       // passed directly.  Clang may use those instead of "byval" aggregate
6292       // types to avoid forcing arguments to memory unnecessarily.
6293       if (GPR_idx != NumGPRs) {
6294         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6295       } else {
6296         if (IsFastCall)
6297           ComputePtrOff();
6298 
6299         assert(HasParameterArea &&
6300                "Parameter area must exist to pass an argument in memory.");
6301         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6302                          true, CFlags.IsTailCall, false, MemOpChains,
6303                          TailCallArguments, dl);
6304         if (IsFastCall)
6305           ArgOffset += PtrByteSize;
6306       }
6307       if (!IsFastCall)
6308         ArgOffset += PtrByteSize;
6309       break;
6310     case MVT::f32:
6311     case MVT::f64: {
6312       // These can be scalar arguments or elements of a float array type
6313       // passed directly.  The latter are used to implement ELFv2 homogenous
6314       // float aggregates.
6315 
6316       // Named arguments go into FPRs first, and once they overflow, the
6317       // remaining arguments go into GPRs and then the parameter save area.
6318       // Unnamed arguments for vararg functions always go to GPRs and
6319       // then the parameter save area.  For now, put all arguments to vararg
6320       // routines always in both locations (FPR *and* GPR or stack slot).
6321       bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6322       bool NeededLoad = false;
6323 
6324       // First load the argument into the next available FPR.
6325       if (FPR_idx != NumFPRs)
6326         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6327 
6328       // Next, load the argument into GPR or stack slot if needed.
6329       if (!NeedGPROrStack)
6330         ;
6331       else if (GPR_idx != NumGPRs && !IsFastCall) {
6332         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6333         // once we support fp <-> gpr moves.
6334 
6335         // In the non-vararg case, this can only ever happen in the
6336         // presence of f32 array types, since otherwise we never run
6337         // out of FPRs before running out of GPRs.
6338         SDValue ArgVal;
6339 
6340         // Double values are always passed in a single GPR.
6341         if (Arg.getValueType() != MVT::f32) {
6342           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6343 
6344         // Non-array float values are extended and passed in a GPR.
6345         } else if (!Flags.isInConsecutiveRegs()) {
6346           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6347           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6348 
6349         // If we have an array of floats, we collect every odd element
6350         // together with its predecessor into one GPR.
6351         } else if (ArgOffset % PtrByteSize != 0) {
6352           SDValue Lo, Hi;
6353           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6354           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6355           if (!isLittleEndian)
6356             std::swap(Lo, Hi);
6357           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6358 
6359         // The final element, if even, goes into the first half of a GPR.
6360         } else if (Flags.isInConsecutiveRegsLast()) {
6361           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6362           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6363           if (!isLittleEndian)
6364             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6365                                  DAG.getConstant(32, dl, MVT::i32));
6366 
6367         // Non-final even elements are skipped; they will be handled
6368         // together the with subsequent argument on the next go-around.
6369         } else
6370           ArgVal = SDValue();
6371 
6372         if (ArgVal.getNode())
6373           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6374       } else {
6375         if (IsFastCall)
6376           ComputePtrOff();
6377 
6378         // Single-precision floating-point values are mapped to the
6379         // second (rightmost) word of the stack doubleword.
6380         if (Arg.getValueType() == MVT::f32 &&
6381             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6382           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6383           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6384         }
6385 
6386         assert(HasParameterArea &&
6387                "Parameter area must exist to pass an argument in memory.");
6388         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6389                          true, CFlags.IsTailCall, false, MemOpChains,
6390                          TailCallArguments, dl);
6391 
6392         NeededLoad = true;
6393       }
6394       // When passing an array of floats, the array occupies consecutive
6395       // space in the argument area; only round up to the next doubleword
6396       // at the end of the array.  Otherwise, each float takes 8 bytes.
6397       if (!IsFastCall || NeededLoad) {
6398         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6399                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6400         if (Flags.isInConsecutiveRegsLast())
6401           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6402       }
6403       break;
6404     }
6405     case MVT::v4f32:
6406     case MVT::v4i32:
6407     case MVT::v8i16:
6408     case MVT::v16i8:
6409     case MVT::v2f64:
6410     case MVT::v2i64:
6411     case MVT::v1i128:
6412     case MVT::f128:
6413       // These can be scalar arguments or elements of a vector array type
6414       // passed directly.  The latter are used to implement ELFv2 homogenous
6415       // vector aggregates.
6416 
6417       // For a varargs call, named arguments go into VRs or on the stack as
6418       // usual; unnamed arguments always go to the stack or the corresponding
6419       // GPRs when within range.  For now, we always put the value in both
6420       // locations (or even all three).
6421       if (CFlags.IsVarArg) {
6422         assert(HasParameterArea &&
6423                "Parameter area must exist if we have a varargs call.");
6424         // We could elide this store in the case where the object fits
6425         // entirely in R registers.  Maybe later.
6426         SDValue Store =
6427             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6428         MemOpChains.push_back(Store);
6429         if (VR_idx != NumVRs) {
6430           SDValue Load =
6431               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6432           MemOpChains.push_back(Load.getValue(1));
6433           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6434         }
6435         ArgOffset += 16;
6436         for (unsigned i=0; i<16; i+=PtrByteSize) {
6437           if (GPR_idx == NumGPRs)
6438             break;
6439           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6440                                    DAG.getConstant(i, dl, PtrVT));
6441           SDValue Load =
6442               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6443           MemOpChains.push_back(Load.getValue(1));
6444           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6445         }
6446         break;
6447       }
6448 
6449       // Non-varargs Altivec params go into VRs or on the stack.
6450       if (VR_idx != NumVRs) {
6451         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6452       } else {
6453         if (IsFastCall)
6454           ComputePtrOff();
6455 
6456         assert(HasParameterArea &&
6457                "Parameter area must exist to pass an argument in memory.");
6458         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6459                          true, CFlags.IsTailCall, true, MemOpChains,
6460                          TailCallArguments, dl);
6461         if (IsFastCall)
6462           ArgOffset += 16;
6463       }
6464 
6465       if (!IsFastCall)
6466         ArgOffset += 16;
6467       break;
6468     }
6469   }
6470 
6471   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6472          "mismatch in size of parameter area");
6473   (void)NumBytesActuallyUsed;
6474 
6475   if (!MemOpChains.empty())
6476     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6477 
6478   // Check if this is an indirect call (MTCTR/BCTRL).
6479   // See prepareDescriptorIndirectCall and buildCallOperands for more
6480   // information about calls through function pointers in the 64-bit SVR4 ABI.
6481   if (CFlags.IsIndirect) {
6482     // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6483     // caller in the TOC save area.
6484     if (isTOCSaveRestoreRequired(Subtarget)) {
6485       assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6486       // Load r2 into a virtual register and store it to the TOC save area.
6487       setUsesTOCBasePtr(DAG);
6488       SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6489       // TOC save area offset.
6490       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6491       SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6492       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6493       Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6494                            MachinePointerInfo::getStack(
6495                                DAG.getMachineFunction(), TOCSaveOffset));
6496     }
6497     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6498     // This does not mean the MTCTR instruction must use R12; it's easier
6499     // to model this as an extra parameter, so do that.
6500     if (isELFv2ABI && !CFlags.IsPatchPoint)
6501       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6502   }
6503 
6504   // Build a sequence of copy-to-reg nodes chained together with token chain
6505   // and flag operands which copy the outgoing args into the appropriate regs.
6506   SDValue InFlag;
6507   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6508     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6509                              RegsToPass[i].second, InFlag);
6510     InFlag = Chain.getValue(1);
6511   }
6512 
6513   if (CFlags.IsTailCall && !IsSibCall)
6514     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6515                     TailCallArguments);
6516 
6517   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6518                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6519 }
6520 
6521 SDValue PPCTargetLowering::LowerCall_Darwin(
6522     SDValue Chain, SDValue Callee, CallFlags CFlags,
6523     const SmallVectorImpl<ISD::OutputArg> &Outs,
6524     const SmallVectorImpl<SDValue> &OutVals,
6525     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6526     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6527     const CallBase *CB) const {
6528   unsigned NumOps = Outs.size();
6529 
6530   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6531   bool isPPC64 = PtrVT == MVT::i64;
6532   unsigned PtrByteSize = isPPC64 ? 8 : 4;
6533 
6534   MachineFunction &MF = DAG.getMachineFunction();
6535 
6536   // Mark this function as potentially containing a function that contains a
6537   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6538   // and restoring the callers stack pointer in this functions epilog. This is
6539   // done because by tail calling the called function might overwrite the value
6540   // in this function's (MF) stack pointer stack slot 0(SP).
6541   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6542       CFlags.CallConv == CallingConv::Fast)
6543     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6544 
6545   // Count how many bytes are to be pushed on the stack, including the linkage
6546   // area, and parameter passing area.  We start with 24/48 bytes, which is
6547   // prereserved space for [SP][CR][LR][3 x unused].
6548   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6549   unsigned NumBytes = LinkageSize;
6550 
6551   // Add up all the space actually used.
6552   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6553   // they all go in registers, but we must reserve stack space for them for
6554   // possible use by the caller.  In varargs or 64-bit calls, parameters are
6555   // assigned stack space in order, with padding so Altivec parameters are
6556   // 16-byte aligned.
6557   unsigned nAltivecParamsAtEnd = 0;
6558   for (unsigned i = 0; i != NumOps; ++i) {
6559     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6560     EVT ArgVT = Outs[i].VT;
6561     // Varargs Altivec parameters are padded to a 16 byte boundary.
6562     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6563         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6564         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6565       if (!CFlags.IsVarArg && !isPPC64) {
6566         // Non-varargs Altivec parameters go after all the non-Altivec
6567         // parameters; handle those later so we know how much padding we need.
6568         nAltivecParamsAtEnd++;
6569         continue;
6570       }
6571       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6572       NumBytes = ((NumBytes+15)/16)*16;
6573     }
6574     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6575   }
6576 
6577   // Allow for Altivec parameters at the end, if needed.
6578   if (nAltivecParamsAtEnd) {
6579     NumBytes = ((NumBytes+15)/16)*16;
6580     NumBytes += 16*nAltivecParamsAtEnd;
6581   }
6582 
6583   // The prolog code of the callee may store up to 8 GPR argument registers to
6584   // the stack, allowing va_start to index over them in memory if its varargs.
6585   // Because we cannot tell if this is needed on the caller side, we have to
6586   // conservatively assume that it is needed.  As such, make sure we have at
6587   // least enough stack space for the caller to store the 8 GPRs.
6588   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6589 
6590   // Tail call needs the stack to be aligned.
6591   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6592       CFlags.CallConv == CallingConv::Fast)
6593     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6594 
6595   // Calculate by how many bytes the stack has to be adjusted in case of tail
6596   // call optimization.
6597   int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6598 
6599   // To protect arguments on the stack from being clobbered in a tail call,
6600   // force all the loads to happen before doing any other lowering.
6601   if (CFlags.IsTailCall)
6602     Chain = DAG.getStackArgumentTokenFactor(Chain);
6603 
6604   // Adjust the stack pointer for the new arguments...
6605   // These operations are automatically eliminated by the prolog/epilog pass
6606   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6607   SDValue CallSeqStart = Chain;
6608 
6609   // Load the return address and frame pointer so it can be move somewhere else
6610   // later.
6611   SDValue LROp, FPOp;
6612   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6613 
6614   // Set up a copy of the stack pointer for use loading and storing any
6615   // arguments that may not fit in the registers available for argument
6616   // passing.
6617   SDValue StackPtr;
6618   if (isPPC64)
6619     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6620   else
6621     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6622 
6623   // Figure out which arguments are going to go in registers, and which in
6624   // memory.  Also, if this is a vararg function, floating point operations
6625   // must be stored to our stack, and loaded into integer regs as well, if
6626   // any integer regs are available for argument passing.
6627   unsigned ArgOffset = LinkageSize;
6628   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6629 
6630   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6631     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6632     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6633   };
6634   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6635     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6636     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6637   };
6638   static const MCPhysReg VR[] = {
6639     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6640     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6641   };
6642   const unsigned NumGPRs = array_lengthof(GPR_32);
6643   const unsigned NumFPRs = 13;
6644   const unsigned NumVRs  = array_lengthof(VR);
6645 
6646   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6647 
6648   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6649   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6650 
6651   SmallVector<SDValue, 8> MemOpChains;
6652   for (unsigned i = 0; i != NumOps; ++i) {
6653     SDValue Arg = OutVals[i];
6654     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6655 
6656     // PtrOff will be used to store the current argument to the stack if a
6657     // register cannot be found for it.
6658     SDValue PtrOff;
6659 
6660     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6661 
6662     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6663 
6664     // On PPC64, promote integers to 64-bit values.
6665     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6666       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6667       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6668       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6669     }
6670 
6671     // FIXME memcpy is used way more than necessary.  Correctness first.
6672     // Note: "by value" is code for passing a structure by value, not
6673     // basic types.
6674     if (Flags.isByVal()) {
6675       unsigned Size = Flags.getByValSize();
6676       // Very small objects are passed right-justified.  Everything else is
6677       // passed left-justified.
6678       if (Size==1 || Size==2) {
6679         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6680         if (GPR_idx != NumGPRs) {
6681           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6682                                         MachinePointerInfo(), VT);
6683           MemOpChains.push_back(Load.getValue(1));
6684           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6685 
6686           ArgOffset += PtrByteSize;
6687         } else {
6688           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6689                                           PtrOff.getValueType());
6690           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6691           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6692                                                             CallSeqStart,
6693                                                             Flags, DAG, dl);
6694           ArgOffset += PtrByteSize;
6695         }
6696         continue;
6697       }
6698       // Copy entire object into memory.  There are cases where gcc-generated
6699       // code assumes it is there, even if it could be put entirely into
6700       // registers.  (This is not what the doc says.)
6701       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6702                                                         CallSeqStart,
6703                                                         Flags, DAG, dl);
6704 
6705       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6706       // copy the pieces of the object that fit into registers from the
6707       // parameter save area.
6708       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6709         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6710         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6711         if (GPR_idx != NumGPRs) {
6712           SDValue Load =
6713               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6714           MemOpChains.push_back(Load.getValue(1));
6715           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6716           ArgOffset += PtrByteSize;
6717         } else {
6718           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6719           break;
6720         }
6721       }
6722       continue;
6723     }
6724 
6725     switch (Arg.getSimpleValueType().SimpleTy) {
6726     default: llvm_unreachable("Unexpected ValueType for argument!");
6727     case MVT::i1:
6728     case MVT::i32:
6729     case MVT::i64:
6730       if (GPR_idx != NumGPRs) {
6731         if (Arg.getValueType() == MVT::i1)
6732           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6733 
6734         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6735       } else {
6736         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6737                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6738                          TailCallArguments, dl);
6739       }
6740       ArgOffset += PtrByteSize;
6741       break;
6742     case MVT::f32:
6743     case MVT::f64:
6744       if (FPR_idx != NumFPRs) {
6745         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6746 
6747         if (CFlags.IsVarArg) {
6748           SDValue Store =
6749               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6750           MemOpChains.push_back(Store);
6751 
6752           // Float varargs are always shadowed in available integer registers
6753           if (GPR_idx != NumGPRs) {
6754             SDValue Load =
6755                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6756             MemOpChains.push_back(Load.getValue(1));
6757             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6758           }
6759           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6760             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6761             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6762             SDValue Load =
6763                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6764             MemOpChains.push_back(Load.getValue(1));
6765             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6766           }
6767         } else {
6768           // If we have any FPRs remaining, we may also have GPRs remaining.
6769           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6770           // GPRs.
6771           if (GPR_idx != NumGPRs)
6772             ++GPR_idx;
6773           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6774               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6775             ++GPR_idx;
6776         }
6777       } else
6778         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6779                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6780                          TailCallArguments, dl);
6781       if (isPPC64)
6782         ArgOffset += 8;
6783       else
6784         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6785       break;
6786     case MVT::v4f32:
6787     case MVT::v4i32:
6788     case MVT::v8i16:
6789     case MVT::v16i8:
6790       if (CFlags.IsVarArg) {
6791         // These go aligned on the stack, or in the corresponding R registers
6792         // when within range.  The Darwin PPC ABI doc claims they also go in
6793         // V registers; in fact gcc does this only for arguments that are
6794         // prototyped, not for those that match the ...  We do it for all
6795         // arguments, seems to work.
6796         while (ArgOffset % 16 !=0) {
6797           ArgOffset += PtrByteSize;
6798           if (GPR_idx != NumGPRs)
6799             GPR_idx++;
6800         }
6801         // We could elide this store in the case where the object fits
6802         // entirely in R registers.  Maybe later.
6803         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6804                              DAG.getConstant(ArgOffset, dl, PtrVT));
6805         SDValue Store =
6806             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6807         MemOpChains.push_back(Store);
6808         if (VR_idx != NumVRs) {
6809           SDValue Load =
6810               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6811           MemOpChains.push_back(Load.getValue(1));
6812           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6813         }
6814         ArgOffset += 16;
6815         for (unsigned i=0; i<16; i+=PtrByteSize) {
6816           if (GPR_idx == NumGPRs)
6817             break;
6818           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6819                                    DAG.getConstant(i, dl, PtrVT));
6820           SDValue Load =
6821               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6822           MemOpChains.push_back(Load.getValue(1));
6823           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6824         }
6825         break;
6826       }
6827 
6828       // Non-varargs Altivec params generally go in registers, but have
6829       // stack space allocated at the end.
6830       if (VR_idx != NumVRs) {
6831         // Doesn't have GPR space allocated.
6832         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6833       } else if (nAltivecParamsAtEnd==0) {
6834         // We are emitting Altivec params in order.
6835         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6836                          isPPC64, CFlags.IsTailCall, true, MemOpChains,
6837                          TailCallArguments, dl);
6838         ArgOffset += 16;
6839       }
6840       break;
6841     }
6842   }
6843   // If all Altivec parameters fit in registers, as they usually do,
6844   // they get stack space following the non-Altivec parameters.  We
6845   // don't track this here because nobody below needs it.
6846   // If there are more Altivec parameters than fit in registers emit
6847   // the stores here.
6848   if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
6849     unsigned j = 0;
6850     // Offset is aligned; skip 1st 12 params which go in V registers.
6851     ArgOffset = ((ArgOffset+15)/16)*16;
6852     ArgOffset += 12*16;
6853     for (unsigned i = 0; i != NumOps; ++i) {
6854       SDValue Arg = OutVals[i];
6855       EVT ArgType = Outs[i].VT;
6856       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6857           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6858         if (++j > NumVRs) {
6859           SDValue PtrOff;
6860           // We are emitting Altivec params in order.
6861           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6862                            isPPC64, CFlags.IsTailCall, true, MemOpChains,
6863                            TailCallArguments, dl);
6864           ArgOffset += 16;
6865         }
6866       }
6867     }
6868   }
6869 
6870   if (!MemOpChains.empty())
6871     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6872 
6873   // On Darwin, R12 must contain the address of an indirect callee.  This does
6874   // not mean the MTCTR instruction must use R12; it's easier to model this as
6875   // an extra parameter, so do that.
6876   if (CFlags.IsIndirect) {
6877     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
6878     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6879                                                    PPC::R12), Callee));
6880   }
6881 
6882   // Build a sequence of copy-to-reg nodes chained together with token chain
6883   // and flag operands which copy the outgoing args into the appropriate regs.
6884   SDValue InFlag;
6885   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6886     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6887                              RegsToPass[i].second, InFlag);
6888     InFlag = Chain.getValue(1);
6889   }
6890 
6891   if (CFlags.IsTailCall)
6892     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6893                     TailCallArguments);
6894 
6895   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6896                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6897 }
6898 
6899 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6900                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6901                    CCState &State) {
6902 
6903   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6904       State.getMachineFunction().getSubtarget());
6905   const bool IsPPC64 = Subtarget.isPPC64();
6906   const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6907   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6908 
6909   assert((!ValVT.isInteger() ||
6910           (ValVT.getSizeInBits() <= RegVT.getSizeInBits())) &&
6911          "Integer argument exceeds register size: should have been legalized");
6912 
6913   if (ValVT == MVT::f128)
6914     report_fatal_error("f128 is unimplemented on AIX.");
6915 
6916   if (ArgFlags.isNest())
6917     report_fatal_error("Nest arguments are unimplemented.");
6918 
6919   if (ValVT.isVector() || LocVT.isVector())
6920     report_fatal_error("Vector arguments are unimplemented on AIX.");
6921 
6922   static const MCPhysReg GPR_32[] = {// 32-bit registers.
6923                                      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6924                                      PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6925   static const MCPhysReg GPR_64[] = {// 64-bit registers.
6926                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6927                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6928 
6929   if (ArgFlags.isByVal()) {
6930     if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6931       report_fatal_error("Pass-by-value arguments with alignment greater than "
6932                          "register width are not supported.");
6933 
6934     const unsigned ByValSize = ArgFlags.getByValSize();
6935 
6936     // An empty aggregate parameter takes up no storage and no registers,
6937     // but needs a MemLoc for a stack slot for the formal arguments side.
6938     if (ByValSize == 0) {
6939       State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6940                                        State.getNextStackOffset(), RegVT,
6941                                        LocInfo));
6942       return false;
6943     }
6944 
6945     const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6946     unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6947     for (const unsigned E = Offset + StackSize; Offset < E;
6948          Offset += PtrAlign.value()) {
6949       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6950         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6951       else {
6952         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6953                                          Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6954                                          LocInfo));
6955         break;
6956       }
6957     }
6958     return false;
6959   }
6960 
6961   // Arguments always reserve parameter save area.
6962   switch (ValVT.SimpleTy) {
6963   default:
6964     report_fatal_error("Unhandled value type for argument.");
6965   case MVT::i64:
6966     // i64 arguments should have been split to i32 for PPC32.
6967     assert(IsPPC64 && "PPC32 should have split i64 values.");
6968     LLVM_FALLTHROUGH;
6969   case MVT::i1:
6970   case MVT::i32: {
6971     const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6972     // AIX integer arguments are always passed in register width.
6973     if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
6974       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6975                                   : CCValAssign::LocInfo::ZExt;
6976     if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6977       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6978     else
6979       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6980 
6981     return false;
6982   }
6983   case MVT::f32:
6984   case MVT::f64: {
6985     // Parameter save area (PSA) is reserved even if the float passes in fpr.
6986     const unsigned StoreSize = LocVT.getStoreSize();
6987     // Floats are always 4-byte aligned in the PSA on AIX.
6988     // This includes f64 in 64-bit mode for ABI compatibility.
6989     const unsigned Offset =
6990         State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6991     unsigned FReg = State.AllocateReg(FPR);
6992     if (FReg)
6993       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6994 
6995     // Reserve and initialize GPRs or initialize the PSA as required.
6996     for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6997       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6998         assert(FReg && "An FPR should be available when a GPR is reserved.");
6999         if (State.isVarArg()) {
7000           // Successfully reserved GPRs are only initialized for vararg calls.
7001           // Custom handling is required for:
7002           //   f64 in PPC32 needs to be split into 2 GPRs.
7003           //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7004           State.addLoc(
7005               CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7006         }
7007       } else {
7008         // If there are insufficient GPRs, the PSA needs to be initialized.
7009         // Initialization occurs even if an FPR was initialized for
7010         // compatibility with the AIX XL compiler. The full memory for the
7011         // argument will be initialized even if a prior word is saved in GPR.
7012         // A custom memLoc is used when the argument also passes in FPR so
7013         // that the callee handling can skip over it easily.
7014         State.addLoc(
7015             FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7016                                              LocInfo)
7017                  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7018         break;
7019       }
7020     }
7021 
7022     return false;
7023   }
7024   }
7025   return true;
7026 }
7027 
7028 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7029                                                     bool IsPPC64) {
7030   assert((IsPPC64 || SVT != MVT::i64) &&
7031          "i64 should have been split for 32-bit codegen.");
7032 
7033   switch (SVT) {
7034   default:
7035     report_fatal_error("Unexpected value type for formal argument");
7036   case MVT::i1:
7037   case MVT::i32:
7038   case MVT::i64:
7039     return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7040   case MVT::f32:
7041     return &PPC::F4RCRegClass;
7042   case MVT::f64:
7043     return &PPC::F8RCRegClass;
7044   }
7045 }
7046 
7047 static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7048                                         SelectionDAG &DAG, SDValue ArgValue,
7049                                         MVT LocVT, const SDLoc &dl) {
7050   assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7051   assert(ValVT.getSizeInBits() < LocVT.getSizeInBits());
7052 
7053   if (Flags.isSExt())
7054     ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7055                            DAG.getValueType(ValVT));
7056   else if (Flags.isZExt())
7057     ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7058                            DAG.getValueType(ValVT));
7059 
7060   return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7061 }
7062 
7063 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7064   const unsigned LASize = FL->getLinkageSize();
7065 
7066   if (PPC::GPRCRegClass.contains(Reg)) {
7067     assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7068            "Reg must be a valid argument register!");
7069     return LASize + 4 * (Reg - PPC::R3);
7070   }
7071 
7072   if (PPC::G8RCRegClass.contains(Reg)) {
7073     assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7074            "Reg must be a valid argument register!");
7075     return LASize + 8 * (Reg - PPC::X3);
7076   }
7077 
7078   llvm_unreachable("Only general purpose registers expected.");
7079 }
7080 
7081 //   AIX ABI Stack Frame Layout:
7082 //
7083 //   Low Memory +--------------------------------------------+
7084 //   SP   +---> | Back chain                                 | ---+
7085 //        |     +--------------------------------------------+    |
7086 //        |     | Saved Condition Register                   |    |
7087 //        |     +--------------------------------------------+    |
7088 //        |     | Saved Linkage Register                     |    |
7089 //        |     +--------------------------------------------+    | Linkage Area
7090 //        |     | Reserved for compilers                     |    |
7091 //        |     +--------------------------------------------+    |
7092 //        |     | Reserved for binders                       |    |
7093 //        |     +--------------------------------------------+    |
7094 //        |     | Saved TOC pointer                          | ---+
7095 //        |     +--------------------------------------------+
7096 //        |     | Parameter save area                        |
7097 //        |     +--------------------------------------------+
7098 //        |     | Alloca space                               |
7099 //        |     +--------------------------------------------+
7100 //        |     | Local variable space                       |
7101 //        |     +--------------------------------------------+
7102 //        |     | Float/int conversion temporary             |
7103 //        |     +--------------------------------------------+
7104 //        |     | Save area for AltiVec registers            |
7105 //        |     +--------------------------------------------+
7106 //        |     | AltiVec alignment padding                  |
7107 //        |     +--------------------------------------------+
7108 //        |     | Save area for VRSAVE register              |
7109 //        |     +--------------------------------------------+
7110 //        |     | Save area for General Purpose registers    |
7111 //        |     +--------------------------------------------+
7112 //        |     | Save area for Floating Point registers     |
7113 //        |     +--------------------------------------------+
7114 //        +---- | Back chain                                 |
7115 // High Memory  +--------------------------------------------+
7116 //
7117 //  Specifications:
7118 //  AIX 7.2 Assembler Language Reference
7119 //  Subroutine linkage convention
7120 
7121 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7122     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7123     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7124     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7125 
7126   assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7127           CallConv == CallingConv::Fast) &&
7128          "Unexpected calling convention!");
7129 
7130   if (getTargetMachine().Options.GuaranteedTailCallOpt)
7131     report_fatal_error("Tail call support is unimplemented on AIX.");
7132 
7133   if (useSoftFloat())
7134     report_fatal_error("Soft float support is unimplemented on AIX.");
7135 
7136   const PPCSubtarget &Subtarget =
7137       static_cast<const PPCSubtarget &>(DAG.getSubtarget());
7138 
7139   const bool IsPPC64 = Subtarget.isPPC64();
7140   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7141 
7142   // Assign locations to all of the incoming arguments.
7143   SmallVector<CCValAssign, 16> ArgLocs;
7144   MachineFunction &MF = DAG.getMachineFunction();
7145   MachineFrameInfo &MFI = MF.getFrameInfo();
7146   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7147 
7148   const EVT PtrVT = getPointerTy(MF.getDataLayout());
7149   // Reserve space for the linkage area on the stack.
7150   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7151   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7152   CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7153 
7154   SmallVector<SDValue, 8> MemOps;
7155 
7156   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7157     CCValAssign &VA = ArgLocs[I++];
7158     MVT LocVT = VA.getLocVT();
7159     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7160 
7161     // For compatibility with the AIX XL compiler, the float args in the
7162     // parameter save area are initialized even if the argument is available
7163     // in register.  The caller is required to initialize both the register
7164     // and memory, however, the callee can choose to expect it in either.
7165     // The memloc is dismissed here because the argument is retrieved from
7166     // the register.
7167     if (VA.isMemLoc() && VA.needsCustom())
7168       continue;
7169 
7170     if (Flags.isByVal() && VA.isMemLoc()) {
7171       const unsigned Size =
7172           alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7173                   PtrByteSize);
7174       const int FI = MF.getFrameInfo().CreateFixedObject(
7175           Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7176           /* IsAliased */ true);
7177       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7178       InVals.push_back(FIN);
7179 
7180       continue;
7181     }
7182 
7183     if (Flags.isByVal()) {
7184       assert(VA.isRegLoc() && "MemLocs should already be handled.");
7185 
7186       const MCPhysReg ArgReg = VA.getLocReg();
7187       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7188 
7189       if (Flags.getNonZeroByValAlign() > PtrByteSize)
7190         report_fatal_error("Over aligned byvals not supported yet.");
7191 
7192       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7193       const int FI = MF.getFrameInfo().CreateFixedObject(
7194           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7195           /* IsAliased */ true);
7196       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7197       InVals.push_back(FIN);
7198 
7199       // Add live ins for all the RegLocs for the same ByVal.
7200       const TargetRegisterClass *RegClass =
7201           IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7202 
7203       auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7204                                                unsigned Offset) {
7205         const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
7206         // Since the callers side has left justified the aggregate in the
7207         // register, we can simply store the entire register into the stack
7208         // slot.
7209         SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7210         // The store to the fixedstack object is needed becuase accessing a
7211         // field of the ByVal will use a gep and load. Ideally we will optimize
7212         // to extracting the value from the register directly, and elide the
7213         // stores when the arguments address is not taken, but that will need to
7214         // be future work.
7215         SDValue Store = DAG.getStore(
7216             CopyFrom.getValue(1), dl, CopyFrom,
7217             DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7218             MachinePointerInfo::getFixedStack(MF, FI, Offset));
7219 
7220         MemOps.push_back(Store);
7221       };
7222 
7223       unsigned Offset = 0;
7224       HandleRegLoc(VA.getLocReg(), Offset);
7225       Offset += PtrByteSize;
7226       for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7227            Offset += PtrByteSize) {
7228         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7229                "RegLocs should be for ByVal argument.");
7230 
7231         const CCValAssign RL = ArgLocs[I++];
7232         HandleRegLoc(RL.getLocReg(), Offset);
7233       }
7234 
7235       if (Offset != StackSize) {
7236         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7237                "Expected MemLoc for remaining bytes.");
7238         assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7239         // Consume the MemLoc.The InVal has already been emitted, so nothing
7240         // more needs to be done.
7241         ++I;
7242       }
7243 
7244       continue;
7245     }
7246 
7247     EVT ValVT = VA.getValVT();
7248     if (VA.isRegLoc() && !VA.needsCustom()) {
7249       MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
7250       unsigned VReg =
7251           MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
7252       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7253       if (ValVT.isScalarInteger() &&
7254           (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
7255         ArgValue =
7256             truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7257       }
7258       InVals.push_back(ArgValue);
7259       continue;
7260     }
7261     if (VA.isMemLoc()) {
7262       const unsigned LocSize = LocVT.getStoreSize();
7263       const unsigned ValSize = ValVT.getStoreSize();
7264       assert((ValSize <= LocSize) &&
7265              "Object size is larger than size of MemLoc");
7266       int CurArgOffset = VA.getLocMemOffset();
7267       // Objects are right-justified because AIX is big-endian.
7268       if (LocSize > ValSize)
7269         CurArgOffset += LocSize - ValSize;
7270       // Potential tail calls could cause overwriting of argument stack slots.
7271       const bool IsImmutable =
7272           !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7273             (CallConv == CallingConv::Fast));
7274       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7275       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7276       SDValue ArgValue =
7277           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7278       InVals.push_back(ArgValue);
7279       continue;
7280     }
7281   }
7282 
7283   // On AIX a minimum of 8 words is saved to the parameter save area.
7284   const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7285   // Area that is at least reserved in the caller of this function.
7286   unsigned CallerReservedArea =
7287       std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7288 
7289   // Set the size that is at least reserved in caller of this function. Tail
7290   // call optimized function's reserved stack space needs to be aligned so
7291   // that taking the difference between two stack areas will result in an
7292   // aligned stack.
7293   CallerReservedArea =
7294       EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7295   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7296   FuncInfo->setMinReservedArea(CallerReservedArea);
7297 
7298   if (isVarArg) {
7299     FuncInfo->setVarArgsFrameIndex(
7300         MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7301     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7302 
7303     static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7304                                        PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7305 
7306     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7307                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7308     const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7309 
7310     // The fixed integer arguments of a variadic function are stored to the
7311     // VarArgsFrameIndex on the stack so that they may be loaded by
7312     // dereferencing the result of va_next.
7313     for (unsigned GPRIndex =
7314              (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7315          GPRIndex < NumGPArgRegs; ++GPRIndex) {
7316 
7317       const unsigned VReg =
7318           IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7319                   : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7320 
7321       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7322       SDValue Store =
7323           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7324       MemOps.push_back(Store);
7325       // Increment the address for the next argument to store.
7326       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7327       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7328     }
7329   }
7330 
7331   if (!MemOps.empty())
7332     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7333 
7334   return Chain;
7335 }
7336 
7337 SDValue PPCTargetLowering::LowerCall_AIX(
7338     SDValue Chain, SDValue Callee, CallFlags CFlags,
7339     const SmallVectorImpl<ISD::OutputArg> &Outs,
7340     const SmallVectorImpl<SDValue> &OutVals,
7341     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7342     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7343     const CallBase *CB) const {
7344   // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7345   // AIX ABI stack frame layout.
7346 
7347   assert((CFlags.CallConv == CallingConv::C ||
7348           CFlags.CallConv == CallingConv::Cold ||
7349           CFlags.CallConv == CallingConv::Fast) &&
7350          "Unexpected calling convention!");
7351 
7352   if (CFlags.IsPatchPoint)
7353     report_fatal_error("This call type is unimplemented on AIX.");
7354 
7355   const PPCSubtarget& Subtarget =
7356       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7357   if (Subtarget.hasAltivec())
7358     report_fatal_error("Altivec support is unimplemented on AIX.");
7359 
7360   MachineFunction &MF = DAG.getMachineFunction();
7361   SmallVector<CCValAssign, 16> ArgLocs;
7362   CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7363                  *DAG.getContext());
7364 
7365   // Reserve space for the linkage save area (LSA) on the stack.
7366   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7367   //   [SP][CR][LR][2 x reserved][TOC].
7368   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7369   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7370   const bool IsPPC64 = Subtarget.isPPC64();
7371   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7372   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7373   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7374   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7375 
7376   // The prolog code of the callee may store up to 8 GPR argument registers to
7377   // the stack, allowing va_start to index over them in memory if the callee
7378   // is variadic.
7379   // Because we cannot tell if this is needed on the caller side, we have to
7380   // conservatively assume that it is needed.  As such, make sure we have at
7381   // least enough stack space for the caller to store the 8 GPRs.
7382   const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7383   const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7384                                      CCInfo.getNextStackOffset());
7385 
7386   // Adjust the stack pointer for the new arguments...
7387   // These operations are automatically eliminated by the prolog/epilog pass.
7388   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7389   SDValue CallSeqStart = Chain;
7390 
7391   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7392   SmallVector<SDValue, 8> MemOpChains;
7393 
7394   // Set up a copy of the stack pointer for loading and storing any
7395   // arguments that may not fit in the registers available for argument
7396   // passing.
7397   const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7398                                    : DAG.getRegister(PPC::R1, MVT::i32);
7399 
7400   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7401     const unsigned ValNo = ArgLocs[I].getValNo();
7402     SDValue Arg = OutVals[ValNo];
7403     ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7404 
7405     if (Flags.isByVal()) {
7406       const unsigned ByValSize = Flags.getByValSize();
7407 
7408       // Nothing to do for zero-sized ByVals on the caller side.
7409       if (!ByValSize) {
7410         ++I;
7411         continue;
7412       }
7413 
7414       auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7415         return DAG.getExtLoad(
7416             ISD::ZEXTLOAD, dl, PtrVT, Chain,
7417             (LoadOffset != 0)
7418                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7419                 : Arg,
7420             MachinePointerInfo(), VT);
7421       };
7422 
7423       unsigned LoadOffset = 0;
7424 
7425       // Initialize registers, which are fully occupied by the by-val argument.
7426       while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7427         SDValue Load = GetLoad(PtrVT, LoadOffset);
7428         MemOpChains.push_back(Load.getValue(1));
7429         LoadOffset += PtrByteSize;
7430         const CCValAssign &ByValVA = ArgLocs[I++];
7431         assert(ByValVA.getValNo() == ValNo &&
7432                "Unexpected location for pass-by-value argument.");
7433         RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7434       }
7435 
7436       if (LoadOffset == ByValSize)
7437         continue;
7438 
7439       // There must be one more loc to handle the remainder.
7440       assert(ArgLocs[I].getValNo() == ValNo &&
7441              "Expected additional location for by-value argument.");
7442 
7443       if (ArgLocs[I].isMemLoc()) {
7444         assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7445         const CCValAssign &ByValVA = ArgLocs[I++];
7446         ISD::ArgFlagsTy MemcpyFlags = Flags;
7447         // Only memcpy the bytes that don't pass in register.
7448         MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7449         Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7450             (LoadOffset != 0)
7451                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7452                 : Arg,
7453             DAG.getObjectPtrOffset(dl, StackPtr,
7454                                    TypeSize::Fixed(ByValVA.getLocMemOffset())),
7455             CallSeqStart, MemcpyFlags, DAG, dl);
7456         continue;
7457       }
7458 
7459       // Initialize the final register residue.
7460       // Any residue that occupies the final by-val arg register must be
7461       // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7462       // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7463       // 2 and 1 byte loads.
7464       const unsigned ResidueBytes = ByValSize % PtrByteSize;
7465       assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7466              "Unexpected register residue for by-value argument.");
7467       SDValue ResidueVal;
7468       for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7469         const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7470         const MVT VT =
7471             N == 1 ? MVT::i8
7472                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7473         SDValue Load = GetLoad(VT, LoadOffset);
7474         MemOpChains.push_back(Load.getValue(1));
7475         LoadOffset += N;
7476         Bytes += N;
7477 
7478         // By-val arguments are passed left-justfied in register.
7479         // Every load here needs to be shifted, otherwise a full register load
7480         // should have been used.
7481         assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7482                "Unexpected load emitted during handling of pass-by-value "
7483                "argument.");
7484         unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7485         EVT ShiftAmountTy =
7486             getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7487         SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7488         SDValue ShiftedLoad =
7489             DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7490         ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7491                                               ShiftedLoad)
7492                                 : ShiftedLoad;
7493       }
7494 
7495       const CCValAssign &ByValVA = ArgLocs[I++];
7496       RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7497       continue;
7498     }
7499 
7500     CCValAssign &VA = ArgLocs[I++];
7501     const MVT LocVT = VA.getLocVT();
7502     const MVT ValVT = VA.getValVT();
7503 
7504     switch (VA.getLocInfo()) {
7505     default:
7506       report_fatal_error("Unexpected argument extension type.");
7507     case CCValAssign::Full:
7508       break;
7509     case CCValAssign::ZExt:
7510       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7511       break;
7512     case CCValAssign::SExt:
7513       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7514       break;
7515     }
7516 
7517     if (VA.isRegLoc() && !VA.needsCustom()) {
7518       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7519       continue;
7520     }
7521 
7522     if (VA.isMemLoc()) {
7523       SDValue PtrOff =
7524           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7525       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7526       MemOpChains.push_back(
7527           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7528 
7529       continue;
7530     }
7531 
7532     // Custom handling is used for GPR initializations for vararg float
7533     // arguments.
7534     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7535            ValVT.isFloatingPoint() && LocVT.isInteger() &&
7536            "Unexpected register handling for calling convention.");
7537 
7538     SDValue ArgAsInt =
7539         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7540 
7541     if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7542       // f32 in 32-bit GPR
7543       // f64 in 64-bit GPR
7544       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7545     else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits())
7546       // f32 in 64-bit GPR.
7547       RegsToPass.push_back(std::make_pair(
7548           VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7549     else {
7550       // f64 in two 32-bit GPRs
7551       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7552       assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7553              "Unexpected custom register for argument!");
7554       CCValAssign &GPR1 = VA;
7555       SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7556                                      DAG.getConstant(32, dl, MVT::i8));
7557       RegsToPass.push_back(std::make_pair(
7558           GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7559 
7560       if (I != E) {
7561         // If only 1 GPR was available, there will only be one custom GPR and
7562         // the argument will also pass in memory.
7563         CCValAssign &PeekArg = ArgLocs[I];
7564         if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7565           assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7566           CCValAssign &GPR2 = ArgLocs[I++];
7567           RegsToPass.push_back(std::make_pair(
7568               GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7569         }
7570       }
7571     }
7572   }
7573 
7574   if (!MemOpChains.empty())
7575     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7576 
7577   // For indirect calls, we need to save the TOC base to the stack for
7578   // restoration after the call.
7579   if (CFlags.IsIndirect) {
7580     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7581     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7582     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7583     const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7584     const unsigned TOCSaveOffset =
7585         Subtarget.getFrameLowering()->getTOCSaveOffset();
7586 
7587     setUsesTOCBasePtr(DAG);
7588     SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7589     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7590     SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7591     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7592     Chain = DAG.getStore(
7593         Val.getValue(1), dl, Val, AddPtr,
7594         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7595   }
7596 
7597   // Build a sequence of copy-to-reg nodes chained together with token chain
7598   // and flag operands which copy the outgoing args into the appropriate regs.
7599   SDValue InFlag;
7600   for (auto Reg : RegsToPass) {
7601     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7602     InFlag = Chain.getValue(1);
7603   }
7604 
7605   const int SPDiff = 0;
7606   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7607                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
7608 }
7609 
7610 bool
7611 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7612                                   MachineFunction &MF, bool isVarArg,
7613                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
7614                                   LLVMContext &Context) const {
7615   SmallVector<CCValAssign, 16> RVLocs;
7616   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7617   return CCInfo.CheckReturn(
7618       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7619                 ? RetCC_PPC_Cold
7620                 : RetCC_PPC);
7621 }
7622 
7623 SDValue
7624 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7625                                bool isVarArg,
7626                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7627                                const SmallVectorImpl<SDValue> &OutVals,
7628                                const SDLoc &dl, SelectionDAG &DAG) const {
7629   SmallVector<CCValAssign, 16> RVLocs;
7630   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7631                  *DAG.getContext());
7632   CCInfo.AnalyzeReturn(Outs,
7633                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7634                            ? RetCC_PPC_Cold
7635                            : RetCC_PPC);
7636 
7637   SDValue Flag;
7638   SmallVector<SDValue, 4> RetOps(1, Chain);
7639 
7640   // Copy the result values into the output registers.
7641   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7642     CCValAssign &VA = RVLocs[i];
7643     assert(VA.isRegLoc() && "Can only return in registers!");
7644 
7645     SDValue Arg = OutVals[RealResIdx];
7646 
7647     switch (VA.getLocInfo()) {
7648     default: llvm_unreachable("Unknown loc info!");
7649     case CCValAssign::Full: break;
7650     case CCValAssign::AExt:
7651       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7652       break;
7653     case CCValAssign::ZExt:
7654       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7655       break;
7656     case CCValAssign::SExt:
7657       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7658       break;
7659     }
7660     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7661       bool isLittleEndian = Subtarget.isLittleEndian();
7662       // Legalize ret f64 -> ret 2 x i32.
7663       SDValue SVal =
7664           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7665                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7666       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7667       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7668       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7669                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7670       Flag = Chain.getValue(1);
7671       VA = RVLocs[++i]; // skip ahead to next loc
7672       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7673     } else
7674       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7675     Flag = Chain.getValue(1);
7676     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7677   }
7678 
7679   RetOps[0] = Chain;  // Update chain.
7680 
7681   // Add the flag if we have it.
7682   if (Flag.getNode())
7683     RetOps.push_back(Flag);
7684 
7685   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7686 }
7687 
7688 SDValue
7689 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7690                                                 SelectionDAG &DAG) const {
7691   SDLoc dl(Op);
7692 
7693   // Get the correct type for integers.
7694   EVT IntVT = Op.getValueType();
7695 
7696   // Get the inputs.
7697   SDValue Chain = Op.getOperand(0);
7698   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7699   // Build a DYNAREAOFFSET node.
7700   SDValue Ops[2] = {Chain, FPSIdx};
7701   SDVTList VTs = DAG.getVTList(IntVT);
7702   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7703 }
7704 
7705 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7706                                              SelectionDAG &DAG) const {
7707   // When we pop the dynamic allocation we need to restore the SP link.
7708   SDLoc dl(Op);
7709 
7710   // Get the correct type for pointers.
7711   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7712 
7713   // Construct the stack pointer operand.
7714   bool isPPC64 = Subtarget.isPPC64();
7715   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7716   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7717 
7718   // Get the operands for the STACKRESTORE.
7719   SDValue Chain = Op.getOperand(0);
7720   SDValue SaveSP = Op.getOperand(1);
7721 
7722   // Load the old link SP.
7723   SDValue LoadLinkSP =
7724       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7725 
7726   // Restore the stack pointer.
7727   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7728 
7729   // Store the old link SP.
7730   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7731 }
7732 
7733 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7734   MachineFunction &MF = DAG.getMachineFunction();
7735   bool isPPC64 = Subtarget.isPPC64();
7736   EVT PtrVT = getPointerTy(MF.getDataLayout());
7737 
7738   // Get current frame pointer save index.  The users of this index will be
7739   // primarily DYNALLOC instructions.
7740   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7741   int RASI = FI->getReturnAddrSaveIndex();
7742 
7743   // If the frame pointer save index hasn't been defined yet.
7744   if (!RASI) {
7745     // Find out what the fix offset of the frame pointer save area.
7746     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7747     // Allocate the frame index for frame pointer save area.
7748     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7749     // Save the result.
7750     FI->setReturnAddrSaveIndex(RASI);
7751   }
7752   return DAG.getFrameIndex(RASI, PtrVT);
7753 }
7754 
7755 SDValue
7756 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7757   MachineFunction &MF = DAG.getMachineFunction();
7758   bool isPPC64 = Subtarget.isPPC64();
7759   EVT PtrVT = getPointerTy(MF.getDataLayout());
7760 
7761   // Get current frame pointer save index.  The users of this index will be
7762   // primarily DYNALLOC instructions.
7763   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7764   int FPSI = FI->getFramePointerSaveIndex();
7765 
7766   // If the frame pointer save index hasn't been defined yet.
7767   if (!FPSI) {
7768     // Find out what the fix offset of the frame pointer save area.
7769     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7770     // Allocate the frame index for frame pointer save area.
7771     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7772     // Save the result.
7773     FI->setFramePointerSaveIndex(FPSI);
7774   }
7775   return DAG.getFrameIndex(FPSI, PtrVT);
7776 }
7777 
7778 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7779                                                    SelectionDAG &DAG) const {
7780   MachineFunction &MF = DAG.getMachineFunction();
7781   // Get the inputs.
7782   SDValue Chain = Op.getOperand(0);
7783   SDValue Size  = Op.getOperand(1);
7784   SDLoc dl(Op);
7785 
7786   // Get the correct type for pointers.
7787   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7788   // Negate the size.
7789   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7790                                 DAG.getConstant(0, dl, PtrVT), Size);
7791   // Construct a node for the frame pointer save index.
7792   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7793   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7794   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7795   if (hasInlineStackProbe(MF))
7796     return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7797   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7798 }
7799 
7800 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7801                                                      SelectionDAG &DAG) const {
7802   MachineFunction &MF = DAG.getMachineFunction();
7803 
7804   bool isPPC64 = Subtarget.isPPC64();
7805   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7806 
7807   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7808   return DAG.getFrameIndex(FI, PtrVT);
7809 }
7810 
7811 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7812                                                SelectionDAG &DAG) const {
7813   SDLoc DL(Op);
7814   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7815                      DAG.getVTList(MVT::i32, MVT::Other),
7816                      Op.getOperand(0), Op.getOperand(1));
7817 }
7818 
7819 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7820                                                 SelectionDAG &DAG) const {
7821   SDLoc DL(Op);
7822   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7823                      Op.getOperand(0), Op.getOperand(1));
7824 }
7825 
7826 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7827 
7828   assert(Op.getValueType() == MVT::i1 &&
7829          "Custom lowering only for i1 loads");
7830 
7831   // First, load 8 bits into 32 bits, then truncate to 1 bit.
7832 
7833   SDLoc dl(Op);
7834   LoadSDNode *LD = cast<LoadSDNode>(Op);
7835 
7836   SDValue Chain = LD->getChain();
7837   SDValue BasePtr = LD->getBasePtr();
7838   MachineMemOperand *MMO = LD->getMemOperand();
7839 
7840   SDValue NewLD =
7841       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7842                      BasePtr, MVT::i8, MMO);
7843   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7844 
7845   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7846   return DAG.getMergeValues(Ops, dl);
7847 }
7848 
7849 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7850   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7851          "Custom lowering only for i1 stores");
7852 
7853   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7854 
7855   SDLoc dl(Op);
7856   StoreSDNode *ST = cast<StoreSDNode>(Op);
7857 
7858   SDValue Chain = ST->getChain();
7859   SDValue BasePtr = ST->getBasePtr();
7860   SDValue Value = ST->getValue();
7861   MachineMemOperand *MMO = ST->getMemOperand();
7862 
7863   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7864                       Value);
7865   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7866 }
7867 
7868 // FIXME: Remove this once the ANDI glue bug is fixed:
7869 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7870   assert(Op.getValueType() == MVT::i1 &&
7871          "Custom lowering only for i1 results");
7872 
7873   SDLoc DL(Op);
7874   return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7875 }
7876 
7877 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7878                                                SelectionDAG &DAG) const {
7879 
7880   // Implements a vector truncate that fits in a vector register as a shuffle.
7881   // We want to legalize vector truncates down to where the source fits in
7882   // a vector register (and target is therefore smaller than vector register
7883   // size).  At that point legalization will try to custom lower the sub-legal
7884   // result and get here - where we can contain the truncate as a single target
7885   // operation.
7886 
7887   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7888   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7889   //
7890   // We will implement it for big-endian ordering as this (where x denotes
7891   // undefined):
7892   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7893   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7894   //
7895   // The same operation in little-endian ordering will be:
7896   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7897   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7898 
7899   EVT TrgVT = Op.getValueType();
7900   assert(TrgVT.isVector() && "Vector type expected.");
7901   unsigned TrgNumElts = TrgVT.getVectorNumElements();
7902   EVT EltVT = TrgVT.getVectorElementType();
7903   if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7904       TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7905       !isPowerOf2_32(EltVT.getSizeInBits()))
7906     return SDValue();
7907 
7908   SDValue N1 = Op.getOperand(0);
7909   EVT SrcVT = N1.getValueType();
7910   unsigned SrcSize = SrcVT.getSizeInBits();
7911   if (SrcSize > 256 ||
7912       !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7913       !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
7914     return SDValue();
7915   if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7916     return SDValue();
7917 
7918   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7919   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7920 
7921   SDLoc DL(Op);
7922   SDValue Op1, Op2;
7923   if (SrcSize == 256) {
7924     EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7925     EVT SplitVT =
7926         N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
7927     unsigned SplitNumElts = SplitVT.getVectorNumElements();
7928     Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7929                       DAG.getConstant(0, DL, VecIdxTy));
7930     Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7931                       DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7932   }
7933   else {
7934     Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7935     Op2 = DAG.getUNDEF(WideVT);
7936   }
7937 
7938   // First list the elements we want to keep.
7939   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7940   SmallVector<int, 16> ShuffV;
7941   if (Subtarget.isLittleEndian())
7942     for (unsigned i = 0; i < TrgNumElts; ++i)
7943       ShuffV.push_back(i * SizeMult);
7944   else
7945     for (unsigned i = 1; i <= TrgNumElts; ++i)
7946       ShuffV.push_back(i * SizeMult - 1);
7947 
7948   // Populate the remaining elements with undefs.
7949   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7950     // ShuffV.push_back(i + WideNumElts);
7951     ShuffV.push_back(WideNumElts + 1);
7952 
7953   Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7954   Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7955   return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7956 }
7957 
7958 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7959 /// possible.
7960 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7961   // Not FP, or using SPE? Not a fsel.
7962   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7963       !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
7964     return Op;
7965 
7966   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7967 
7968   EVT ResVT = Op.getValueType();
7969   EVT CmpVT = Op.getOperand(0).getValueType();
7970   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7971   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
7972   SDLoc dl(Op);
7973   SDNodeFlags Flags = Op.getNode()->getFlags();
7974 
7975   // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7976   // presence of infinities.
7977   if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7978     switch (CC) {
7979     default:
7980       break;
7981     case ISD::SETOGT:
7982     case ISD::SETGT:
7983       return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
7984     case ISD::SETOLT:
7985     case ISD::SETLT:
7986       return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
7987     }
7988   }
7989 
7990   // We might be able to do better than this under some circumstances, but in
7991   // general, fsel-based lowering of select is a finite-math-only optimization.
7992   // For more information, see section F.3 of the 2.06 ISA specification.
7993   // With ISA 3.0
7994   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7995       (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7996     return Op;
7997 
7998   // If the RHS of the comparison is a 0.0, we don't need to do the
7999   // subtraction at all.
8000   SDValue Sel1;
8001   if (isFloatingPointZero(RHS))
8002     switch (CC) {
8003     default: break;       // SETUO etc aren't handled by fsel.
8004     case ISD::SETNE:
8005       std::swap(TV, FV);
8006       LLVM_FALLTHROUGH;
8007     case ISD::SETEQ:
8008       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8009         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8010       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8011       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8012         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8013       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8014                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8015     case ISD::SETULT:
8016     case ISD::SETLT:
8017       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8018       LLVM_FALLTHROUGH;
8019     case ISD::SETOGE:
8020     case ISD::SETGE:
8021       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8022         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8023       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8024     case ISD::SETUGT:
8025     case ISD::SETGT:
8026       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8027       LLVM_FALLTHROUGH;
8028     case ISD::SETOLE:
8029     case ISD::SETLE:
8030       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8031         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8032       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8033                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8034     }
8035 
8036   SDValue Cmp;
8037   switch (CC) {
8038   default: break;       // SETUO etc aren't handled by fsel.
8039   case ISD::SETNE:
8040     std::swap(TV, FV);
8041     LLVM_FALLTHROUGH;
8042   case ISD::SETEQ:
8043     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8044     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8045       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8046     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8047     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8048       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8049     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8050                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8051   case ISD::SETULT:
8052   case ISD::SETLT:
8053     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8054     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8055       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8056     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8057   case ISD::SETOGE:
8058   case ISD::SETGE:
8059     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8060     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8061       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8062     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8063   case ISD::SETUGT:
8064   case ISD::SETGT:
8065     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8066     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8067       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8068     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8069   case ISD::SETOLE:
8070   case ISD::SETLE:
8071     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8072     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8073       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8074     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8075   }
8076   return Op;
8077 }
8078 
8079 static unsigned getPPCStrictOpcode(unsigned Opc) {
8080   switch (Opc) {
8081   default:
8082     llvm_unreachable("No strict version of this opcode!");
8083   case PPCISD::FCTIDZ:
8084     return PPCISD::STRICT_FCTIDZ;
8085   case PPCISD::FCTIWZ:
8086     return PPCISD::STRICT_FCTIWZ;
8087   case PPCISD::FCTIDUZ:
8088     return PPCISD::STRICT_FCTIDUZ;
8089   case PPCISD::FCTIWUZ:
8090     return PPCISD::STRICT_FCTIWUZ;
8091   case PPCISD::FCFID:
8092     return PPCISD::STRICT_FCFID;
8093   case PPCISD::FCFIDU:
8094     return PPCISD::STRICT_FCFIDU;
8095   case PPCISD::FCFIDS:
8096     return PPCISD::STRICT_FCFIDS;
8097   case PPCISD::FCFIDUS:
8098     return PPCISD::STRICT_FCFIDUS;
8099   }
8100 }
8101 
8102 static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8103                               const PPCSubtarget &Subtarget) {
8104   SDLoc dl(Op);
8105   bool IsStrict = Op->isStrictFPOpcode();
8106   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8107                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8108 
8109   // TODO: Any other flags to propagate?
8110   SDNodeFlags Flags;
8111   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8112 
8113   // For strict nodes, source is the second operand.
8114   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8115   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8116   assert(Src.getValueType().isFloatingPoint());
8117   if (Src.getValueType() == MVT::f32) {
8118     if (IsStrict) {
8119       Src =
8120           DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8121                       DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8122       Chain = Src.getValue(1);
8123     } else
8124       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8125   }
8126   SDValue Conv;
8127   unsigned Opc = ISD::DELETED_NODE;
8128   switch (Op.getSimpleValueType().SimpleTy) {
8129   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8130   case MVT::i32:
8131     Opc = IsSigned ? PPCISD::FCTIWZ
8132                    : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8133     break;
8134   case MVT::i64:
8135     assert((IsSigned || Subtarget.hasFPCVT()) &&
8136            "i64 FP_TO_UINT is supported only with FPCVT");
8137     Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8138   }
8139   if (IsStrict) {
8140     Opc = getPPCStrictOpcode(Opc);
8141     Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8142                        {Chain, Src}, Flags);
8143   } else {
8144     Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8145   }
8146   return Conv;
8147 }
8148 
8149 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8150                                                SelectionDAG &DAG,
8151                                                const SDLoc &dl) const {
8152   SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8153   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8154                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8155   bool IsStrict = Op->isStrictFPOpcode();
8156 
8157   // Convert the FP value to an int value through memory.
8158   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8159                   (IsSigned || Subtarget.hasFPCVT());
8160   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8161   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8162   MachinePointerInfo MPI =
8163       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8164 
8165   // Emit a store to the stack slot.
8166   SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8167   Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8168   if (i32Stack) {
8169     MachineFunction &MF = DAG.getMachineFunction();
8170     Alignment = Align(4);
8171     MachineMemOperand *MMO =
8172         MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8173     SDValue Ops[] = { Chain, Tmp, FIPtr };
8174     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8175               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8176   } else
8177     Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8178 
8179   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
8180   // add in a bias on big endian.
8181   if (Op.getValueType() == MVT::i32 && !i32Stack) {
8182     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8183                         DAG.getConstant(4, dl, FIPtr.getValueType()));
8184     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8185   }
8186 
8187   RLI.Chain = Chain;
8188   RLI.Ptr = FIPtr;
8189   RLI.MPI = MPI;
8190   RLI.Alignment = Alignment;
8191 }
8192 
8193 /// Custom lowers floating point to integer conversions to use
8194 /// the direct move instructions available in ISA 2.07 to avoid the
8195 /// need for load/store combinations.
8196 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8197                                                     SelectionDAG &DAG,
8198                                                     const SDLoc &dl) const {
8199   SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8200   SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8201   if (Op->isStrictFPOpcode())
8202     return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8203   else
8204     return Mov;
8205 }
8206 
8207 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8208                                           const SDLoc &dl) const {
8209   bool IsStrict = Op->isStrictFPOpcode();
8210   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8211                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8212   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8213   EVT SrcVT = Src.getValueType();
8214   EVT DstVT = Op.getValueType();
8215 
8216   // FP to INT conversions are legal for f128.
8217   if (SrcVT == MVT::f128)
8218     return Op;
8219 
8220   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8221   // PPC (the libcall is not available).
8222   if (SrcVT == MVT::ppcf128) {
8223     if (DstVT == MVT::i32) {
8224       // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8225       // set other fast-math flags to FP operations in both strict and
8226       // non-strict cases. (FP_TO_SINT, FSUB)
8227       SDNodeFlags Flags;
8228       Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8229 
8230       if (IsSigned) {
8231         SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8232                                  DAG.getIntPtrConstant(0, dl));
8233         SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8234                                  DAG.getIntPtrConstant(1, dl));
8235 
8236         // Add the two halves of the long double in round-to-zero mode, and use
8237         // a smaller FP_TO_SINT.
8238         if (IsStrict) {
8239           SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8240                                     DAG.getVTList(MVT::f64, MVT::Other),
8241                                     {Op.getOperand(0), Lo, Hi}, Flags);
8242           return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8243                              DAG.getVTList(MVT::i32, MVT::Other),
8244                              {Res.getValue(1), Res}, Flags);
8245         } else {
8246           SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8247           return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8248         }
8249       } else {
8250         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8251         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8252         SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8253         SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8254         if (IsStrict) {
8255           // Sel = Src < 0x80000000
8256           // FltOfs = select Sel, 0.0, 0x80000000
8257           // IntOfs = select Sel, 0, 0x80000000
8258           // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8259           SDValue Chain = Op.getOperand(0);
8260           EVT SetCCVT =
8261               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8262           EVT DstSetCCVT =
8263               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8264           SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8265                                      SDNodeFlags(), Chain, true);
8266           Chain = Sel.getValue(1);
8267 
8268           SDValue FltOfs = DAG.getSelect(
8269               dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8270           Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8271 
8272           SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8273                                     DAG.getVTList(SrcVT, MVT::Other),
8274                                     {Chain, Src, FltOfs}, Flags);
8275           Chain = Val.getValue(1);
8276           SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8277                                      DAG.getVTList(DstVT, MVT::Other),
8278                                      {Chain, Val}, Flags);
8279           Chain = SInt.getValue(1);
8280           SDValue IntOfs = DAG.getSelect(
8281               dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8282           SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8283           return DAG.getMergeValues({Result, Chain}, dl);
8284         } else {
8285           // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8286           // FIXME: generated code sucks.
8287           SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8288           True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8289           True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8290           SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8291           return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8292         }
8293       }
8294     }
8295 
8296     return SDValue();
8297   }
8298 
8299   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8300     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8301 
8302   ReuseLoadInfo RLI;
8303   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8304 
8305   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8306                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8307 }
8308 
8309 // We're trying to insert a regular store, S, and then a load, L. If the
8310 // incoming value, O, is a load, we might just be able to have our load use the
8311 // address used by O. However, we don't know if anything else will store to
8312 // that address before we can load from it. To prevent this situation, we need
8313 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8314 // the same chain operand as O, we create a token factor from the chain results
8315 // of O and L, and we replace all uses of O's chain result with that token
8316 // factor (see spliceIntoChain below for this last part).
8317 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8318                                             ReuseLoadInfo &RLI,
8319                                             SelectionDAG &DAG,
8320                                             ISD::LoadExtType ET) const {
8321   // Conservatively skip reusing for constrained FP nodes.
8322   if (Op->isStrictFPOpcode())
8323     return false;
8324 
8325   SDLoc dl(Op);
8326   bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8327                        (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8328   if (ET == ISD::NON_EXTLOAD &&
8329       (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8330       isOperationLegalOrCustom(Op.getOpcode(),
8331                                Op.getOperand(0).getValueType())) {
8332 
8333     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8334     return true;
8335   }
8336 
8337   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8338   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8339       LD->isNonTemporal())
8340     return false;
8341   if (LD->getMemoryVT() != MemVT)
8342     return false;
8343 
8344   RLI.Ptr = LD->getBasePtr();
8345   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8346     assert(LD->getAddressingMode() == ISD::PRE_INC &&
8347            "Non-pre-inc AM on PPC?");
8348     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8349                           LD->getOffset());
8350   }
8351 
8352   RLI.Chain = LD->getChain();
8353   RLI.MPI = LD->getPointerInfo();
8354   RLI.IsDereferenceable = LD->isDereferenceable();
8355   RLI.IsInvariant = LD->isInvariant();
8356   RLI.Alignment = LD->getAlign();
8357   RLI.AAInfo = LD->getAAInfo();
8358   RLI.Ranges = LD->getRanges();
8359 
8360   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8361   return true;
8362 }
8363 
8364 // Given the head of the old chain, ResChain, insert a token factor containing
8365 // it and NewResChain, and make users of ResChain now be users of that token
8366 // factor.
8367 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8368 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8369                                         SDValue NewResChain,
8370                                         SelectionDAG &DAG) const {
8371   if (!ResChain)
8372     return;
8373 
8374   SDLoc dl(NewResChain);
8375 
8376   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8377                            NewResChain, DAG.getUNDEF(MVT::Other));
8378   assert(TF.getNode() != NewResChain.getNode() &&
8379          "A new TF really is required here");
8380 
8381   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8382   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8383 }
8384 
8385 /// Analyze profitability of direct move
8386 /// prefer float load to int load plus direct move
8387 /// when there is no integer use of int load
8388 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8389   SDNode *Origin = Op.getOperand(0).getNode();
8390   if (Origin->getOpcode() != ISD::LOAD)
8391     return true;
8392 
8393   // If there is no LXSIBZX/LXSIHZX, like Power8,
8394   // prefer direct move if the memory size is 1 or 2 bytes.
8395   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8396   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8397     return true;
8398 
8399   for (SDNode::use_iterator UI = Origin->use_begin(),
8400                             UE = Origin->use_end();
8401        UI != UE; ++UI) {
8402 
8403     // Only look at the users of the loaded value.
8404     if (UI.getUse().get().getResNo() != 0)
8405       continue;
8406 
8407     if (UI->getOpcode() != ISD::SINT_TO_FP &&
8408         UI->getOpcode() != ISD::UINT_TO_FP &&
8409         UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8410         UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8411       return true;
8412   }
8413 
8414   return false;
8415 }
8416 
8417 static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8418                               const PPCSubtarget &Subtarget,
8419                               SDValue Chain = SDValue()) {
8420   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8421                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8422   SDLoc dl(Op);
8423 
8424   // TODO: Any other flags to propagate?
8425   SDNodeFlags Flags;
8426   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8427 
8428   // If we have FCFIDS, then use it when converting to single-precision.
8429   // Otherwise, convert to double-precision and then round.
8430   bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8431   unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8432                               : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8433   EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8434   if (Op->isStrictFPOpcode()) {
8435     if (!Chain)
8436       Chain = Op.getOperand(0);
8437     return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8438                        DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8439   } else
8440     return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8441 }
8442 
8443 /// Custom lowers integer to floating point conversions to use
8444 /// the direct move instructions available in ISA 2.07 to avoid the
8445 /// need for load/store combinations.
8446 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8447                                                     SelectionDAG &DAG,
8448                                                     const SDLoc &dl) const {
8449   assert((Op.getValueType() == MVT::f32 ||
8450           Op.getValueType() == MVT::f64) &&
8451          "Invalid floating point type as target of conversion");
8452   assert(Subtarget.hasFPCVT() &&
8453          "Int to FP conversions with direct moves require FPCVT");
8454   SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8455   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8456   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8457                 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8458   unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8459   SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8460   return convertIntToFP(Op, Mov, DAG, Subtarget);
8461 }
8462 
8463 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8464 
8465   EVT VecVT = Vec.getValueType();
8466   assert(VecVT.isVector() && "Expected a vector type.");
8467   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8468 
8469   EVT EltVT = VecVT.getVectorElementType();
8470   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8471   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8472 
8473   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8474   SmallVector<SDValue, 16> Ops(NumConcat);
8475   Ops[0] = Vec;
8476   SDValue UndefVec = DAG.getUNDEF(VecVT);
8477   for (unsigned i = 1; i < NumConcat; ++i)
8478     Ops[i] = UndefVec;
8479 
8480   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8481 }
8482 
8483 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8484                                                 const SDLoc &dl) const {
8485   bool IsStrict = Op->isStrictFPOpcode();
8486   unsigned Opc = Op.getOpcode();
8487   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8488   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8489           Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8490          "Unexpected conversion type");
8491   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8492          "Supports conversions to v2f64/v4f32 only.");
8493 
8494   // TODO: Any other flags to propagate?
8495   SDNodeFlags Flags;
8496   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8497 
8498   bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8499   bool FourEltRes = Op.getValueType() == MVT::v4f32;
8500 
8501   SDValue Wide = widenVec(DAG, Src, dl);
8502   EVT WideVT = Wide.getValueType();
8503   unsigned WideNumElts = WideVT.getVectorNumElements();
8504   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8505 
8506   SmallVector<int, 16> ShuffV;
8507   for (unsigned i = 0; i < WideNumElts; ++i)
8508     ShuffV.push_back(i + WideNumElts);
8509 
8510   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8511   int SaveElts = FourEltRes ? 4 : 2;
8512   if (Subtarget.isLittleEndian())
8513     for (int i = 0; i < SaveElts; i++)
8514       ShuffV[i * Stride] = i;
8515   else
8516     for (int i = 1; i <= SaveElts; i++)
8517       ShuffV[i * Stride - 1] = i - 1;
8518 
8519   SDValue ShuffleSrc2 =
8520       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8521   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8522 
8523   SDValue Extend;
8524   if (SignedConv) {
8525     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8526     EVT ExtVT = Src.getValueType();
8527     if (Subtarget.hasP9Altivec())
8528       ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8529                                IntermediateVT.getVectorNumElements());
8530 
8531     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8532                          DAG.getValueType(ExtVT));
8533   } else
8534     Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8535 
8536   if (IsStrict)
8537     return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8538                        {Op.getOperand(0), Extend}, Flags);
8539 
8540   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8541 }
8542 
8543 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8544                                           SelectionDAG &DAG) const {
8545   SDLoc dl(Op);
8546   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8547                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8548   bool IsStrict = Op->isStrictFPOpcode();
8549   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8550   SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8551 
8552   // TODO: Any other flags to propagate?
8553   SDNodeFlags Flags;
8554   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8555 
8556   EVT InVT = Src.getValueType();
8557   EVT OutVT = Op.getValueType();
8558   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8559       isOperationCustom(Op.getOpcode(), InVT))
8560     return LowerINT_TO_FPVector(Op, DAG, dl);
8561 
8562   // Conversions to f128 are legal.
8563   if (Op.getValueType() == MVT::f128)
8564     return Op;
8565 
8566   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8567   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8568     return SDValue();
8569 
8570   if (Src.getValueType() == MVT::i1)
8571     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8572                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
8573                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
8574 
8575   // If we have direct moves, we can do all the conversion, skip the store/load
8576   // however, without FPCVT we can't do most conversions.
8577   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8578       Subtarget.isPPC64() && Subtarget.hasFPCVT())
8579     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8580 
8581   assert((IsSigned || Subtarget.hasFPCVT()) &&
8582          "UINT_TO_FP is supported only with FPCVT");
8583 
8584   if (Src.getValueType() == MVT::i64) {
8585     SDValue SINT = Src;
8586     // When converting to single-precision, we actually need to convert
8587     // to double-precision first and then round to single-precision.
8588     // To avoid double-rounding effects during that operation, we have
8589     // to prepare the input operand.  Bits that might be truncated when
8590     // converting to double-precision are replaced by a bit that won't
8591     // be lost at this stage, but is below the single-precision rounding
8592     // position.
8593     //
8594     // However, if -enable-unsafe-fp-math is in effect, accept double
8595     // rounding to avoid the extra overhead.
8596     if (Op.getValueType() == MVT::f32 &&
8597         !Subtarget.hasFPCVT() &&
8598         !DAG.getTarget().Options.UnsafeFPMath) {
8599 
8600       // Twiddle input to make sure the low 11 bits are zero.  (If this
8601       // is the case, we are guaranteed the value will fit into the 53 bit
8602       // mantissa of an IEEE double-precision value without rounding.)
8603       // If any of those low 11 bits were not zero originally, make sure
8604       // bit 12 (value 2048) is set instead, so that the final rounding
8605       // to single-precision gets the correct result.
8606       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8607                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
8608       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8609                           Round, DAG.getConstant(2047, dl, MVT::i64));
8610       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8611       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8612                           Round, DAG.getConstant(-2048, dl, MVT::i64));
8613 
8614       // However, we cannot use that value unconditionally: if the magnitude
8615       // of the input value is small, the bit-twiddling we did above might
8616       // end up visibly changing the output.  Fortunately, in that case, we
8617       // don't need to twiddle bits since the original input will convert
8618       // exactly to double-precision floating-point already.  Therefore,
8619       // construct a conditional to use the original value if the top 11
8620       // bits are all sign-bit copies, and use the rounded value computed
8621       // above otherwise.
8622       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8623                                  SINT, DAG.getConstant(53, dl, MVT::i32));
8624       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8625                          Cond, DAG.getConstant(1, dl, MVT::i64));
8626       Cond = DAG.getSetCC(
8627           dl,
8628           getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8629           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8630 
8631       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8632     }
8633 
8634     ReuseLoadInfo RLI;
8635     SDValue Bits;
8636 
8637     MachineFunction &MF = DAG.getMachineFunction();
8638     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8639       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8640                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8641       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8642     } else if (Subtarget.hasLFIWAX() &&
8643                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8644       MachineMemOperand *MMO =
8645         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8646                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8647       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8648       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8649                                      DAG.getVTList(MVT::f64, MVT::Other),
8650                                      Ops, MVT::i32, MMO);
8651       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8652     } else if (Subtarget.hasFPCVT() &&
8653                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8654       MachineMemOperand *MMO =
8655         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8656                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8657       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8658       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8659                                      DAG.getVTList(MVT::f64, MVT::Other),
8660                                      Ops, MVT::i32, MMO);
8661       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8662     } else if (((Subtarget.hasLFIWAX() &&
8663                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8664                 (Subtarget.hasFPCVT() &&
8665                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8666                SINT.getOperand(0).getValueType() == MVT::i32) {
8667       MachineFrameInfo &MFI = MF.getFrameInfo();
8668       EVT PtrVT = getPointerTy(DAG.getDataLayout());
8669 
8670       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8671       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8672 
8673       SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8674                                    MachinePointerInfo::getFixedStack(
8675                                        DAG.getMachineFunction(), FrameIdx));
8676       Chain = Store;
8677 
8678       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8679              "Expected an i32 store");
8680 
8681       RLI.Ptr = FIdx;
8682       RLI.Chain = Chain;
8683       RLI.MPI =
8684           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8685       RLI.Alignment = Align(4);
8686 
8687       MachineMemOperand *MMO =
8688         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8689                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8690       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8691       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8692                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
8693                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
8694                                      Ops, MVT::i32, MMO);
8695       Chain = Bits.getValue(1);
8696     } else
8697       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8698 
8699     SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8700     if (IsStrict)
8701       Chain = FP.getValue(1);
8702 
8703     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8704       if (IsStrict)
8705         FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8706                          DAG.getVTList(MVT::f32, MVT::Other),
8707                          {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8708       else
8709         FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8710                          DAG.getIntPtrConstant(0, dl));
8711     }
8712     return FP;
8713   }
8714 
8715   assert(Src.getValueType() == MVT::i32 &&
8716          "Unhandled INT_TO_FP type in custom expander!");
8717   // Since we only generate this in 64-bit mode, we can take advantage of
8718   // 64-bit registers.  In particular, sign extend the input value into the
8719   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8720   // then lfd it and fcfid it.
8721   MachineFunction &MF = DAG.getMachineFunction();
8722   MachineFrameInfo &MFI = MF.getFrameInfo();
8723   EVT PtrVT = getPointerTy(MF.getDataLayout());
8724 
8725   SDValue Ld;
8726   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8727     ReuseLoadInfo RLI;
8728     bool ReusingLoad;
8729     if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8730       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8731       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8732 
8733       SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8734                                    MachinePointerInfo::getFixedStack(
8735                                        DAG.getMachineFunction(), FrameIdx));
8736       Chain = Store;
8737 
8738       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8739              "Expected an i32 store");
8740 
8741       RLI.Ptr = FIdx;
8742       RLI.Chain = Chain;
8743       RLI.MPI =
8744           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8745       RLI.Alignment = Align(4);
8746     }
8747 
8748     MachineMemOperand *MMO =
8749       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8750                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8751     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8752     Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8753                                  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8754                                  MVT::i32, MMO);
8755     Chain = Ld.getValue(1);
8756     if (ReusingLoad)
8757       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8758   } else {
8759     assert(Subtarget.isPPC64() &&
8760            "i32->FP without LFIWAX supported only on PPC64");
8761 
8762     int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8763     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8764 
8765     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8766 
8767     // STD the extended value into the stack slot.
8768     SDValue Store = DAG.getStore(
8769         Chain, dl, Ext64, FIdx,
8770         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8771     Chain = Store;
8772 
8773     // Load the value as a double.
8774     Ld = DAG.getLoad(
8775         MVT::f64, dl, Chain, FIdx,
8776         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8777     Chain = Ld.getValue(1);
8778   }
8779 
8780   // FCFID it and return it.
8781   SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8782   if (IsStrict)
8783     Chain = FP.getValue(1);
8784   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8785     if (IsStrict)
8786       FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8787                        DAG.getVTList(MVT::f32, MVT::Other),
8788                        {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8789     else
8790       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8791                        DAG.getIntPtrConstant(0, dl));
8792   }
8793   return FP;
8794 }
8795 
8796 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8797                                             SelectionDAG &DAG) const {
8798   SDLoc dl(Op);
8799   /*
8800    The rounding mode is in bits 30:31 of FPSR, and has the following
8801    settings:
8802      00 Round to nearest
8803      01 Round to 0
8804      10 Round to +inf
8805      11 Round to -inf
8806 
8807   FLT_ROUNDS, on the other hand, expects the following:
8808     -1 Undefined
8809      0 Round to 0
8810      1 Round to nearest
8811      2 Round to +inf
8812      3 Round to -inf
8813 
8814   To perform the conversion, we do:
8815     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8816   */
8817 
8818   MachineFunction &MF = DAG.getMachineFunction();
8819   EVT VT = Op.getValueType();
8820   EVT PtrVT = getPointerTy(MF.getDataLayout());
8821 
8822   // Save FP Control Word to register
8823   SDValue Chain = Op.getOperand(0);
8824   SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8825   Chain = MFFS.getValue(1);
8826 
8827   // Save FP register to stack slot
8828   int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8829   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8830   Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8831 
8832   // Load FP Control Word from low 32 bits of stack slot.
8833   SDValue Four = DAG.getConstant(4, dl, PtrVT);
8834   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8835   SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8836   Chain = CWD.getValue(1);
8837 
8838   // Transform as necessary
8839   SDValue CWD1 =
8840     DAG.getNode(ISD::AND, dl, MVT::i32,
8841                 CWD, DAG.getConstant(3, dl, MVT::i32));
8842   SDValue CWD2 =
8843     DAG.getNode(ISD::SRL, dl, MVT::i32,
8844                 DAG.getNode(ISD::AND, dl, MVT::i32,
8845                             DAG.getNode(ISD::XOR, dl, MVT::i32,
8846                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
8847                             DAG.getConstant(3, dl, MVT::i32)),
8848                 DAG.getConstant(1, dl, MVT::i32));
8849 
8850   SDValue RetVal =
8851     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8852 
8853   RetVal =
8854       DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8855                   dl, VT, RetVal);
8856 
8857   return DAG.getMergeValues({RetVal, Chain}, dl);
8858 }
8859 
8860 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8861   EVT VT = Op.getValueType();
8862   unsigned BitWidth = VT.getSizeInBits();
8863   SDLoc dl(Op);
8864   assert(Op.getNumOperands() == 3 &&
8865          VT == Op.getOperand(1).getValueType() &&
8866          "Unexpected SHL!");
8867 
8868   // Expand into a bunch of logical ops.  Note that these ops
8869   // depend on the PPC behavior for oversized shift amounts.
8870   SDValue Lo = Op.getOperand(0);
8871   SDValue Hi = Op.getOperand(1);
8872   SDValue Amt = Op.getOperand(2);
8873   EVT AmtVT = Amt.getValueType();
8874 
8875   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8876                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8877   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8878   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8879   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8880   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8881                              DAG.getConstant(-BitWidth, dl, AmtVT));
8882   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8883   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8884   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8885   SDValue OutOps[] = { OutLo, OutHi };
8886   return DAG.getMergeValues(OutOps, dl);
8887 }
8888 
8889 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8890   EVT VT = Op.getValueType();
8891   SDLoc dl(Op);
8892   unsigned BitWidth = VT.getSizeInBits();
8893   assert(Op.getNumOperands() == 3 &&
8894          VT == Op.getOperand(1).getValueType() &&
8895          "Unexpected SRL!");
8896 
8897   // Expand into a bunch of logical ops.  Note that these ops
8898   // depend on the PPC behavior for oversized shift amounts.
8899   SDValue Lo = Op.getOperand(0);
8900   SDValue Hi = Op.getOperand(1);
8901   SDValue Amt = Op.getOperand(2);
8902   EVT AmtVT = Amt.getValueType();
8903 
8904   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8905                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8906   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8907   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8908   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8909   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8910                              DAG.getConstant(-BitWidth, dl, AmtVT));
8911   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8912   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8913   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8914   SDValue OutOps[] = { OutLo, OutHi };
8915   return DAG.getMergeValues(OutOps, dl);
8916 }
8917 
8918 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8919   SDLoc dl(Op);
8920   EVT VT = Op.getValueType();
8921   unsigned BitWidth = VT.getSizeInBits();
8922   assert(Op.getNumOperands() == 3 &&
8923          VT == Op.getOperand(1).getValueType() &&
8924          "Unexpected SRA!");
8925 
8926   // Expand into a bunch of logical ops, followed by a select_cc.
8927   SDValue Lo = Op.getOperand(0);
8928   SDValue Hi = Op.getOperand(1);
8929   SDValue Amt = Op.getOperand(2);
8930   EVT AmtVT = Amt.getValueType();
8931 
8932   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8933                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8934   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8935   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8936   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8937   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8938                              DAG.getConstant(-BitWidth, dl, AmtVT));
8939   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8940   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8941   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8942                                   Tmp4, Tmp6, ISD::SETLE);
8943   SDValue OutOps[] = { OutLo, OutHi };
8944   return DAG.getMergeValues(OutOps, dl);
8945 }
8946 
8947 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8948                                             SelectionDAG &DAG) const {
8949   SDLoc dl(Op);
8950   EVT VT = Op.getValueType();
8951   unsigned BitWidth = VT.getSizeInBits();
8952 
8953   bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8954   SDValue X = Op.getOperand(0);
8955   SDValue Y = Op.getOperand(1);
8956   SDValue Z = Op.getOperand(2);
8957   EVT AmtVT = Z.getValueType();
8958 
8959   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8960   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8961   // This is simpler than TargetLowering::expandFunnelShift because we can rely
8962   // on PowerPC shift by BW being well defined.
8963   Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8964                   DAG.getConstant(BitWidth - 1, dl, AmtVT));
8965   SDValue SubZ =
8966       DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8967   X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8968   Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8969   return DAG.getNode(ISD::OR, dl, VT, X, Y);
8970 }
8971 
8972 //===----------------------------------------------------------------------===//
8973 // Vector related lowering.
8974 //
8975 
8976 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8977 /// element size of SplatSize. Cast the result to VT.
8978 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8979                                       SelectionDAG &DAG, const SDLoc &dl) {
8980   static const MVT VTys[] = { // canonical VT to use for each size.
8981     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8982   };
8983 
8984   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8985 
8986   // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8987   if (Val == ((1LU << (SplatSize * 8)) - 1)) {
8988     SplatSize = 1;
8989     Val = 0xFF;
8990   }
8991 
8992   EVT CanonicalVT = VTys[SplatSize-1];
8993 
8994   // Build a canonical splat for this value.
8995   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8996 }
8997 
8998 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8999 /// specified intrinsic ID.
9000 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9001                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
9002   if (DestVT == MVT::Other) DestVT = Op.getValueType();
9003   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9004                      DAG.getConstant(IID, dl, MVT::i32), Op);
9005 }
9006 
9007 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9008 /// specified intrinsic ID.
9009 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9010                                 SelectionDAG &DAG, const SDLoc &dl,
9011                                 EVT DestVT = MVT::Other) {
9012   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9013   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9014                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9015 }
9016 
9017 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9018 /// specified intrinsic ID.
9019 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9020                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9021                                 EVT DestVT = MVT::Other) {
9022   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9023   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9024                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9025 }
9026 
9027 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9028 /// amount.  The result has the specified value type.
9029 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9030                            SelectionDAG &DAG, const SDLoc &dl) {
9031   // Force LHS/RHS to be the right type.
9032   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9033   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9034 
9035   int Ops[16];
9036   for (unsigned i = 0; i != 16; ++i)
9037     Ops[i] = i + Amt;
9038   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9039   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9040 }
9041 
9042 /// Do we have an efficient pattern in a .td file for this node?
9043 ///
9044 /// \param V - pointer to the BuildVectorSDNode being matched
9045 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9046 ///
9047 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9048 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9049 /// the opposite is true (expansion is beneficial) are:
9050 /// - The node builds a vector out of integers that are not 32 or 64-bits
9051 /// - The node builds a vector out of constants
9052 /// - The node is a "load-and-splat"
9053 /// In all other cases, we will choose to keep the BUILD_VECTOR.
9054 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9055                                             bool HasDirectMove,
9056                                             bool HasP8Vector) {
9057   EVT VecVT = V->getValueType(0);
9058   bool RightType = VecVT == MVT::v2f64 ||
9059     (HasP8Vector && VecVT == MVT::v4f32) ||
9060     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9061   if (!RightType)
9062     return false;
9063 
9064   bool IsSplat = true;
9065   bool IsLoad = false;
9066   SDValue Op0 = V->getOperand(0);
9067 
9068   // This function is called in a block that confirms the node is not a constant
9069   // splat. So a constant BUILD_VECTOR here means the vector is built out of
9070   // different constants.
9071   if (V->isConstant())
9072     return false;
9073   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9074     if (V->getOperand(i).isUndef())
9075       return false;
9076     // We want to expand nodes that represent load-and-splat even if the
9077     // loaded value is a floating point truncation or conversion to int.
9078     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9079         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9080          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9081         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9082          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9083         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9084          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9085       IsLoad = true;
9086     // If the operands are different or the input is not a load and has more
9087     // uses than just this BV node, then it isn't a splat.
9088     if (V->getOperand(i) != Op0 ||
9089         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9090       IsSplat = false;
9091   }
9092   return !(IsSplat && IsLoad);
9093 }
9094 
9095 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9096 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9097 
9098   SDLoc dl(Op);
9099   SDValue Op0 = Op->getOperand(0);
9100 
9101   if ((Op.getValueType() != MVT::f128) ||
9102       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9103       (Op0.getOperand(0).getValueType() != MVT::i64) ||
9104       (Op0.getOperand(1).getValueType() != MVT::i64))
9105     return SDValue();
9106 
9107   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9108                      Op0.getOperand(1));
9109 }
9110 
9111 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9112   const SDValue *InputLoad = &Op;
9113   if (InputLoad->getOpcode() == ISD::BITCAST)
9114     InputLoad = &InputLoad->getOperand(0);
9115   if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9116       InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9117     IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9118     InputLoad = &InputLoad->getOperand(0);
9119   }
9120   if (InputLoad->getOpcode() != ISD::LOAD)
9121     return nullptr;
9122   LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9123   return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9124 }
9125 
9126 // Convert the argument APFloat to a single precision APFloat if there is no
9127 // loss in information during the conversion to single precision APFloat and the
9128 // resulting number is not a denormal number. Return true if successful.
9129 bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9130   APFloat APFloatToConvert = ArgAPFloat;
9131   bool LosesInfo = true;
9132   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9133                            &LosesInfo);
9134   bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9135   if (Success)
9136     ArgAPFloat = APFloatToConvert;
9137   return Success;
9138 }
9139 
9140 // Bitcast the argument APInt to a double and convert it to a single precision
9141 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9142 // argument if there is no loss in information during the conversion from
9143 // double to single precision APFloat and the resulting number is not a denormal
9144 // number. Return true if successful.
9145 bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9146   double DpValue = ArgAPInt.bitsToDouble();
9147   APFloat APFloatDp(DpValue);
9148   bool Success = convertToNonDenormSingle(APFloatDp);
9149   if (Success)
9150     ArgAPInt = APFloatDp.bitcastToAPInt();
9151   return Success;
9152 }
9153 
9154 // If this is a case we can't handle, return null and let the default
9155 // expansion code take care of it.  If we CAN select this case, and if it
9156 // selects to a single instruction, return Op.  Otherwise, if we can codegen
9157 // this case more efficiently than a constant pool load, lower it to the
9158 // sequence of ops that should be used.
9159 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9160                                              SelectionDAG &DAG) const {
9161   SDLoc dl(Op);
9162   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9163   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9164 
9165   // Check if this is a splat of a constant value.
9166   APInt APSplatBits, APSplatUndef;
9167   unsigned SplatBitSize;
9168   bool HasAnyUndefs;
9169   bool BVNIsConstantSplat =
9170       BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9171                            HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9172 
9173   // If it is a splat of a double, check if we can shrink it to a 32 bit
9174   // non-denormal float which when converted back to double gives us the same
9175   // double. This is to exploit the XXSPLTIDP instruction.
9176   if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
9177       (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
9178       convertToNonDenormSingle(APSplatBits)) {
9179     SDValue SplatNode = DAG.getNode(
9180         PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9181         DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9182     return DAG.getBitcast(Op.getValueType(), SplatNode);
9183   }
9184 
9185   if (!BVNIsConstantSplat || SplatBitSize > 32) {
9186 
9187     bool IsPermutedLoad = false;
9188     const SDValue *InputLoad =
9189         getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
9190     // Handle load-and-splat patterns as we have instructions that will do this
9191     // in one go.
9192     if (InputLoad && DAG.isSplatValue(Op, true)) {
9193       LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9194 
9195       // We have handling for 4 and 8 byte elements.
9196       unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
9197 
9198       // Checking for a single use of this load, we have to check for vector
9199       // width (128 bits) / ElementSize uses (since each operand of the
9200       // BUILD_VECTOR is a separate use of the value.
9201       if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
9202           ((Subtarget.hasVSX() && ElementSize == 64) ||
9203            (Subtarget.hasP9Vector() && ElementSize == 32))) {
9204         SDValue Ops[] = {
9205           LD->getChain(),    // Chain
9206           LD->getBasePtr(),  // Ptr
9207           DAG.getValueType(Op.getValueType()) // VT
9208         };
9209         return
9210           DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
9211                                   DAG.getVTList(Op.getValueType(), MVT::Other),
9212                                   Ops, LD->getMemoryVT(), LD->getMemOperand());
9213       }
9214     }
9215 
9216     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
9217     // lowered to VSX instructions under certain conditions.
9218     // Without VSX, there is no pattern more efficient than expanding the node.
9219     if (Subtarget.hasVSX() &&
9220         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9221                                         Subtarget.hasP8Vector()))
9222       return Op;
9223     return SDValue();
9224   }
9225 
9226   uint64_t SplatBits = APSplatBits.getZExtValue();
9227   uint64_t SplatUndef = APSplatUndef.getZExtValue();
9228   unsigned SplatSize = SplatBitSize / 8;
9229 
9230   // First, handle single instruction cases.
9231 
9232   // All zeros?
9233   if (SplatBits == 0) {
9234     // Canonicalize all zero vectors to be v4i32.
9235     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9236       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9237       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9238     }
9239     return Op;
9240   }
9241 
9242   // We have XXSPLTIW for constant splats four bytes wide.
9243   // Given vector length is a multiple of 4, 2-byte splats can be replaced
9244   // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9245   // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9246   // turned into a 4-byte splat of 0xABABABAB.
9247   if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9248     return getCanonicalConstSplat((SplatBits |= SplatBits << 16), SplatSize * 2,
9249                                   Op.getValueType(), DAG, dl);
9250 
9251   if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9252     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9253                                   dl);
9254 
9255   // We have XXSPLTIB for constant splats one byte wide.
9256   if (Subtarget.hasP9Vector() && SplatSize == 1)
9257     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9258                                   dl);
9259 
9260   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9261   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9262                     (32-SplatBitSize));
9263   if (SextVal >= -16 && SextVal <= 15)
9264     return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9265                                   dl);
9266 
9267   // Two instruction sequences.
9268 
9269   // If this value is in the range [-32,30] and is even, use:
9270   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9271   // If this value is in the range [17,31] and is odd, use:
9272   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9273   // If this value is in the range [-31,-17] and is odd, use:
9274   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9275   // Note the last two are three-instruction sequences.
9276   if (SextVal >= -32 && SextVal <= 31) {
9277     // To avoid having these optimizations undone by constant folding,
9278     // we convert to a pseudo that will be expanded later into one of
9279     // the above forms.
9280     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9281     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9282               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9283     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9284     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9285     if (VT == Op.getValueType())
9286       return RetVal;
9287     else
9288       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9289   }
9290 
9291   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
9292   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
9293   // for fneg/fabs.
9294   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9295     // Make -1 and vspltisw -1:
9296     SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9297 
9298     // Make the VSLW intrinsic, computing 0x8000_0000.
9299     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9300                                    OnesV, DAG, dl);
9301 
9302     // xor by OnesV to invert it.
9303     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9304     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9305   }
9306 
9307   // Check to see if this is a wide variety of vsplti*, binop self cases.
9308   static const signed char SplatCsts[] = {
9309     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9310     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9311   };
9312 
9313   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9314     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9315     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
9316     int i = SplatCsts[idx];
9317 
9318     // Figure out what shift amount will be used by altivec if shifted by i in
9319     // this splat size.
9320     unsigned TypeShiftAmt = i & (SplatBitSize-1);
9321 
9322     // vsplti + shl self.
9323     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9324       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9325       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9326         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9327         Intrinsic::ppc_altivec_vslw
9328       };
9329       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9330       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9331     }
9332 
9333     // vsplti + srl self.
9334     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9335       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9336       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9337         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9338         Intrinsic::ppc_altivec_vsrw
9339       };
9340       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9341       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9342     }
9343 
9344     // vsplti + sra self.
9345     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9346       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9347       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9348         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
9349         Intrinsic::ppc_altivec_vsraw
9350       };
9351       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9352       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9353     }
9354 
9355     // vsplti + rol self.
9356     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9357                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9358       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9359       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9360         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9361         Intrinsic::ppc_altivec_vrlw
9362       };
9363       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9364       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9365     }
9366 
9367     // t = vsplti c, result = vsldoi t, t, 1
9368     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9369       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9370       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9371       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9372     }
9373     // t = vsplti c, result = vsldoi t, t, 2
9374     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9375       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9376       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9377       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9378     }
9379     // t = vsplti c, result = vsldoi t, t, 3
9380     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9381       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9382       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9383       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9384     }
9385   }
9386 
9387   return SDValue();
9388 }
9389 
9390 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9391 /// the specified operations to build the shuffle.
9392 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9393                                       SDValue RHS, SelectionDAG &DAG,
9394                                       const SDLoc &dl) {
9395   unsigned OpNum = (PFEntry >> 26) & 0x0F;
9396   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9397   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
9398 
9399   enum {
9400     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9401     OP_VMRGHW,
9402     OP_VMRGLW,
9403     OP_VSPLTISW0,
9404     OP_VSPLTISW1,
9405     OP_VSPLTISW2,
9406     OP_VSPLTISW3,
9407     OP_VSLDOI4,
9408     OP_VSLDOI8,
9409     OP_VSLDOI12
9410   };
9411 
9412   if (OpNum == OP_COPY) {
9413     if (LHSID == (1*9+2)*9+3) return LHS;
9414     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9415     return RHS;
9416   }
9417 
9418   SDValue OpLHS, OpRHS;
9419   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9420   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9421 
9422   int ShufIdxs[16];
9423   switch (OpNum) {
9424   default: llvm_unreachable("Unknown i32 permute!");
9425   case OP_VMRGHW:
9426     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
9427     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9428     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
9429     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9430     break;
9431   case OP_VMRGLW:
9432     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9433     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9434     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9435     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9436     break;
9437   case OP_VSPLTISW0:
9438     for (unsigned i = 0; i != 16; ++i)
9439       ShufIdxs[i] = (i&3)+0;
9440     break;
9441   case OP_VSPLTISW1:
9442     for (unsigned i = 0; i != 16; ++i)
9443       ShufIdxs[i] = (i&3)+4;
9444     break;
9445   case OP_VSPLTISW2:
9446     for (unsigned i = 0; i != 16; ++i)
9447       ShufIdxs[i] = (i&3)+8;
9448     break;
9449   case OP_VSPLTISW3:
9450     for (unsigned i = 0; i != 16; ++i)
9451       ShufIdxs[i] = (i&3)+12;
9452     break;
9453   case OP_VSLDOI4:
9454     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9455   case OP_VSLDOI8:
9456     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9457   case OP_VSLDOI12:
9458     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9459   }
9460   EVT VT = OpLHS.getValueType();
9461   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9462   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9463   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9464   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9465 }
9466 
9467 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9468 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9469 /// SDValue.
9470 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9471                                            SelectionDAG &DAG) const {
9472   const unsigned BytesInVector = 16;
9473   bool IsLE = Subtarget.isLittleEndian();
9474   SDLoc dl(N);
9475   SDValue V1 = N->getOperand(0);
9476   SDValue V2 = N->getOperand(1);
9477   unsigned ShiftElts = 0, InsertAtByte = 0;
9478   bool Swap = false;
9479 
9480   // Shifts required to get the byte we want at element 7.
9481   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
9482                                    0, 15, 14, 13, 12, 11, 10, 9};
9483   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9484                                 1, 2,  3,  4,  5,  6,  7,  8};
9485 
9486   ArrayRef<int> Mask = N->getMask();
9487   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9488 
9489   // For each mask element, find out if we're just inserting something
9490   // from V2 into V1 or vice versa.
9491   // Possible permutations inserting an element from V2 into V1:
9492   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9493   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9494   //   ...
9495   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9496   // Inserting from V1 into V2 will be similar, except mask range will be
9497   // [16,31].
9498 
9499   bool FoundCandidate = false;
9500   // If both vector operands for the shuffle are the same vector, the mask
9501   // will contain only elements from the first one and the second one will be
9502   // undef.
9503   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9504   // Go through the mask of half-words to find an element that's being moved
9505   // from one vector to the other.
9506   for (unsigned i = 0; i < BytesInVector; ++i) {
9507     unsigned CurrentElement = Mask[i];
9508     // If 2nd operand is undefined, we should only look for element 7 in the
9509     // Mask.
9510     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9511       continue;
9512 
9513     bool OtherElementsInOrder = true;
9514     // Examine the other elements in the Mask to see if they're in original
9515     // order.
9516     for (unsigned j = 0; j < BytesInVector; ++j) {
9517       if (j == i)
9518         continue;
9519       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9520       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
9521       // in which we always assume we're always picking from the 1st operand.
9522       int MaskOffset =
9523           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9524       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9525         OtherElementsInOrder = false;
9526         break;
9527       }
9528     }
9529     // If other elements are in original order, we record the number of shifts
9530     // we need to get the element we want into element 7. Also record which byte
9531     // in the vector we should insert into.
9532     if (OtherElementsInOrder) {
9533       // If 2nd operand is undefined, we assume no shifts and no swapping.
9534       if (V2.isUndef()) {
9535         ShiftElts = 0;
9536         Swap = false;
9537       } else {
9538         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9539         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9540                          : BigEndianShifts[CurrentElement & 0xF];
9541         Swap = CurrentElement < BytesInVector;
9542       }
9543       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9544       FoundCandidate = true;
9545       break;
9546     }
9547   }
9548 
9549   if (!FoundCandidate)
9550     return SDValue();
9551 
9552   // Candidate found, construct the proper SDAG sequence with VINSERTB,
9553   // optionally with VECSHL if shift is required.
9554   if (Swap)
9555     std::swap(V1, V2);
9556   if (V2.isUndef())
9557     V2 = V1;
9558   if (ShiftElts) {
9559     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9560                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9561     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9562                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9563   }
9564   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9565                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
9566 }
9567 
9568 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9569 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9570 /// SDValue.
9571 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9572                                            SelectionDAG &DAG) const {
9573   const unsigned NumHalfWords = 8;
9574   const unsigned BytesInVector = NumHalfWords * 2;
9575   // Check that the shuffle is on half-words.
9576   if (!isNByteElemShuffleMask(N, 2, 1))
9577     return SDValue();
9578 
9579   bool IsLE = Subtarget.isLittleEndian();
9580   SDLoc dl(N);
9581   SDValue V1 = N->getOperand(0);
9582   SDValue V2 = N->getOperand(1);
9583   unsigned ShiftElts = 0, InsertAtByte = 0;
9584   bool Swap = false;
9585 
9586   // Shifts required to get the half-word we want at element 3.
9587   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9588   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9589 
9590   uint32_t Mask = 0;
9591   uint32_t OriginalOrderLow = 0x1234567;
9592   uint32_t OriginalOrderHigh = 0x89ABCDEF;
9593   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
9594   // 32-bit space, only need 4-bit nibbles per element.
9595   for (unsigned i = 0; i < NumHalfWords; ++i) {
9596     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9597     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9598   }
9599 
9600   // For each mask element, find out if we're just inserting something
9601   // from V2 into V1 or vice versa.  Possible permutations inserting an element
9602   // from V2 into V1:
9603   //   X, 1, 2, 3, 4, 5, 6, 7
9604   //   0, X, 2, 3, 4, 5, 6, 7
9605   //   0, 1, X, 3, 4, 5, 6, 7
9606   //   0, 1, 2, X, 4, 5, 6, 7
9607   //   0, 1, 2, 3, X, 5, 6, 7
9608   //   0, 1, 2, 3, 4, X, 6, 7
9609   //   0, 1, 2, 3, 4, 5, X, 7
9610   //   0, 1, 2, 3, 4, 5, 6, X
9611   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9612 
9613   bool FoundCandidate = false;
9614   // Go through the mask of half-words to find an element that's being moved
9615   // from one vector to the other.
9616   for (unsigned i = 0; i < NumHalfWords; ++i) {
9617     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9618     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9619     uint32_t MaskOtherElts = ~(0xF << MaskShift);
9620     uint32_t TargetOrder = 0x0;
9621 
9622     // If both vector operands for the shuffle are the same vector, the mask
9623     // will contain only elements from the first one and the second one will be
9624     // undef.
9625     if (V2.isUndef()) {
9626       ShiftElts = 0;
9627       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9628       TargetOrder = OriginalOrderLow;
9629       Swap = false;
9630       // Skip if not the correct element or mask of other elements don't equal
9631       // to our expected order.
9632       if (MaskOneElt == VINSERTHSrcElem &&
9633           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9634         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9635         FoundCandidate = true;
9636         break;
9637       }
9638     } else { // If both operands are defined.
9639       // Target order is [8,15] if the current mask is between [0,7].
9640       TargetOrder =
9641           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9642       // Skip if mask of other elements don't equal our expected order.
9643       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9644         // We only need the last 3 bits for the number of shifts.
9645         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9646                          : BigEndianShifts[MaskOneElt & 0x7];
9647         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9648         Swap = MaskOneElt < NumHalfWords;
9649         FoundCandidate = true;
9650         break;
9651       }
9652     }
9653   }
9654 
9655   if (!FoundCandidate)
9656     return SDValue();
9657 
9658   // Candidate found, construct the proper SDAG sequence with VINSERTH,
9659   // optionally with VECSHL if shift is required.
9660   if (Swap)
9661     std::swap(V1, V2);
9662   if (V2.isUndef())
9663     V2 = V1;
9664   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9665   if (ShiftElts) {
9666     // Double ShiftElts because we're left shifting on v16i8 type.
9667     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9668                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9669     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9670     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9671                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9672     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9673   }
9674   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9675   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9676                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
9677   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9678 }
9679 
9680 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9681 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9682 /// return the default SDValue.
9683 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9684                                               SelectionDAG &DAG) const {
9685   // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9686   // to v16i8. Peek through the bitcasts to get the actual operands.
9687   SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9688   SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9689 
9690   auto ShuffleMask = SVN->getMask();
9691   SDValue VecShuffle(SVN, 0);
9692   SDLoc DL(SVN);
9693 
9694   // Check that we have a four byte shuffle.
9695   if (!isNByteElemShuffleMask(SVN, 4, 1))
9696     return SDValue();
9697 
9698   // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9699   if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9700     std::swap(LHS, RHS);
9701     VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
9702     ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9703   }
9704 
9705   // Ensure that the RHS is a vector of constants.
9706   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9707   if (!BVN)
9708     return SDValue();
9709 
9710   // Check if RHS is a splat of 4-bytes (or smaller).
9711   APInt APSplatValue, APSplatUndef;
9712   unsigned SplatBitSize;
9713   bool HasAnyUndefs;
9714   if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9715                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9716       SplatBitSize > 32)
9717     return SDValue();
9718 
9719   // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9720   // The instruction splats a constant C into two words of the source vector
9721   // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9722   // Thus we check that the shuffle mask is the equivalent  of
9723   // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9724   // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9725   // within each word are consecutive, so we only need to check the first byte.
9726   SDValue Index;
9727   bool IsLE = Subtarget.isLittleEndian();
9728   if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9729       (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9730        ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9731     Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9732   else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9733            (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9734             ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9735     Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9736   else
9737     return SDValue();
9738 
9739   // If the splat is narrower than 32-bits, we need to get the 32-bit value
9740   // for XXSPLTI32DX.
9741   unsigned SplatVal = APSplatValue.getZExtValue();
9742   for (; SplatBitSize < 32; SplatBitSize <<= 1)
9743     SplatVal |= (SplatVal << SplatBitSize);
9744 
9745   SDValue SplatNode = DAG.getNode(
9746       PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9747       Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9748   return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9749 }
9750 
9751 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9752 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9753 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9754 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9755 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9756   assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9757   assert(Op.getValueType() == MVT::v1i128 &&
9758          "Only set v1i128 as custom, other type shouldn't reach here!");
9759   SDLoc dl(Op);
9760   SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9761   SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9762   unsigned SHLAmt = N1.getConstantOperandVal(0);
9763   if (SHLAmt % 8 == 0) {
9764     SmallVector<int, 16> Mask(16, 0);
9765     std::iota(Mask.begin(), Mask.end(), 0);
9766     std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9767     if (SDValue Shuffle =
9768             DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9769                                  DAG.getUNDEF(MVT::v16i8), Mask))
9770       return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9771   }
9772   SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9773   SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9774                               DAG.getConstant(SHLAmt, dl, MVT::i32));
9775   SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9776                               DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9777   SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9778   return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9779 }
9780 
9781 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
9782 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
9783 /// return the code it can be lowered into.  Worst case, it can always be
9784 /// lowered into a vperm.
9785 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9786                                                SelectionDAG &DAG) const {
9787   SDLoc dl(Op);
9788   SDValue V1 = Op.getOperand(0);
9789   SDValue V2 = Op.getOperand(1);
9790   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9791 
9792   // Any nodes that were combined in the target-independent combiner prior
9793   // to vector legalization will not be sent to the target combine. Try to
9794   // combine it here.
9795   if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9796     if (!isa<ShuffleVectorSDNode>(NewShuffle))
9797       return NewShuffle;
9798     Op = NewShuffle;
9799     SVOp = cast<ShuffleVectorSDNode>(Op);
9800     V1 = Op.getOperand(0);
9801     V2 = Op.getOperand(1);
9802   }
9803   EVT VT = Op.getValueType();
9804   bool isLittleEndian = Subtarget.isLittleEndian();
9805 
9806   unsigned ShiftElts, InsertAtByte;
9807   bool Swap = false;
9808 
9809   // If this is a load-and-splat, we can do that with a single instruction
9810   // in some cases. However if the load has multiple uses, we don't want to
9811   // combine it because that will just produce multiple loads.
9812   bool IsPermutedLoad = false;
9813   const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9814   if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9815       (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9816       InputLoad->hasOneUse()) {
9817     bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9818     int SplatIdx =
9819       PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9820 
9821     // The splat index for permuted loads will be in the left half of the vector
9822     // which is strictly wider than the loaded value by 8 bytes. So we need to
9823     // adjust the splat index to point to the correct address in memory.
9824     if (IsPermutedLoad) {
9825       assert(isLittleEndian && "Unexpected permuted load on big endian target");
9826       SplatIdx += IsFourByte ? 2 : 1;
9827       assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9828              "Splat of a value outside of the loaded memory");
9829     }
9830 
9831     LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9832     // For 4-byte load-and-splat, we need Power9.
9833     if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9834       uint64_t Offset = 0;
9835       if (IsFourByte)
9836         Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9837       else
9838         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9839 
9840       SDValue BasePtr = LD->getBasePtr();
9841       if (Offset != 0)
9842         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9843                               BasePtr, DAG.getIntPtrConstant(Offset, dl));
9844       SDValue Ops[] = {
9845         LD->getChain(),    // Chain
9846         BasePtr,           // BasePtr
9847         DAG.getValueType(Op.getValueType()) // VT
9848       };
9849       SDVTList VTL =
9850         DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9851       SDValue LdSplt =
9852         DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9853                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
9854       if (LdSplt.getValueType() != SVOp->getValueType(0))
9855         LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9856       return LdSplt;
9857     }
9858   }
9859   if (Subtarget.hasP9Vector() &&
9860       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9861                            isLittleEndian)) {
9862     if (Swap)
9863       std::swap(V1, V2);
9864     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9865     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9866     if (ShiftElts) {
9867       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9868                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
9869       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9870                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9871       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9872     }
9873     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9874                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9875     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9876   }
9877 
9878   if (Subtarget.hasPrefixInstrs()) {
9879     SDValue SplatInsertNode;
9880     if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9881       return SplatInsertNode;
9882   }
9883 
9884   if (Subtarget.hasP9Altivec()) {
9885     SDValue NewISDNode;
9886     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9887       return NewISDNode;
9888 
9889     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9890       return NewISDNode;
9891   }
9892 
9893   if (Subtarget.hasVSX() &&
9894       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9895     if (Swap)
9896       std::swap(V1, V2);
9897     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9898     SDValue Conv2 =
9899         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9900 
9901     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9902                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9903     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9904   }
9905 
9906   if (Subtarget.hasVSX() &&
9907     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9908     if (Swap)
9909       std::swap(V1, V2);
9910     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9911     SDValue Conv2 =
9912         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9913 
9914     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9915                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9916     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9917   }
9918 
9919   if (Subtarget.hasP9Vector()) {
9920      if (PPC::isXXBRHShuffleMask(SVOp)) {
9921       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9922       SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9923       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9924     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9925       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9926       SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9927       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9928     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9929       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9930       SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9931       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9932     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9933       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9934       SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9935       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9936     }
9937   }
9938 
9939   if (Subtarget.hasVSX()) {
9940     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9941       int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9942 
9943       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9944       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9945                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
9946       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9947     }
9948 
9949     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9950     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9951       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9952       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
9953       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9954     }
9955   }
9956 
9957   // Cases that are handled by instructions that take permute immediates
9958   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9959   // selected by the instruction selector.
9960   if (V2.isUndef()) {
9961     if (PPC::isSplatShuffleMask(SVOp, 1) ||
9962         PPC::isSplatShuffleMask(SVOp, 2) ||
9963         PPC::isSplatShuffleMask(SVOp, 4) ||
9964         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
9965         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
9966         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9967         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9968         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9969         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9970         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9971         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9972         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9973         (Subtarget.hasP8Altivec() && (
9974          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
9975          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9976          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9977       return Op;
9978     }
9979   }
9980 
9981   // Altivec has a variety of "shuffle immediates" that take two vector inputs
9982   // and produce a fixed permutation.  If any of these match, do not lower to
9983   // VPERM.
9984   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
9985   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9986       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9987       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
9988       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9989       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9990       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9991       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9992       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9993       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9994       (Subtarget.hasP8Altivec() && (
9995        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9996        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
9997        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
9998     return Op;
9999 
10000   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
10001   // perfect shuffle table to emit an optimal matching sequence.
10002   ArrayRef<int> PermMask = SVOp->getMask();
10003 
10004   unsigned PFIndexes[4];
10005   bool isFourElementShuffle = true;
10006   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
10007     unsigned EltNo = 8;   // Start out undef.
10008     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
10009       if (PermMask[i*4+j] < 0)
10010         continue;   // Undef, ignore it.
10011 
10012       unsigned ByteSource = PermMask[i*4+j];
10013       if ((ByteSource & 3) != j) {
10014         isFourElementShuffle = false;
10015         break;
10016       }
10017 
10018       if (EltNo == 8) {
10019         EltNo = ByteSource/4;
10020       } else if (EltNo != ByteSource/4) {
10021         isFourElementShuffle = false;
10022         break;
10023       }
10024     }
10025     PFIndexes[i] = EltNo;
10026   }
10027 
10028   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10029   // perfect shuffle vector to determine if it is cost effective to do this as
10030   // discrete instructions, or whether we should use a vperm.
10031   // For now, we skip this for little endian until such time as we have a
10032   // little-endian perfect shuffle table.
10033   if (isFourElementShuffle && !isLittleEndian) {
10034     // Compute the index in the perfect shuffle table.
10035     unsigned PFTableIndex =
10036       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
10037 
10038     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10039     unsigned Cost  = (PFEntry >> 30);
10040 
10041     // Determining when to avoid vperm is tricky.  Many things affect the cost
10042     // of vperm, particularly how many times the perm mask needs to be computed.
10043     // For example, if the perm mask can be hoisted out of a loop or is already
10044     // used (perhaps because there are multiple permutes with the same shuffle
10045     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
10046     // the loop requires an extra register.
10047     //
10048     // As a compromise, we only emit discrete instructions if the shuffle can be
10049     // generated in 3 or fewer operations.  When we have loop information
10050     // available, if this block is within a loop, we should avoid using vperm
10051     // for 3-operation perms and use a constant pool load instead.
10052     if (Cost < 3)
10053       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10054   }
10055 
10056   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10057   // vector that will get spilled to the constant pool.
10058   if (V2.isUndef()) V2 = V1;
10059 
10060   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10061   // that it is in input element units, not in bytes.  Convert now.
10062 
10063   // For little endian, the order of the input vectors is reversed, and
10064   // the permutation mask is complemented with respect to 31.  This is
10065   // necessary to produce proper semantics with the big-endian-biased vperm
10066   // instruction.
10067   EVT EltVT = V1.getValueType().getVectorElementType();
10068   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10069 
10070   SmallVector<SDValue, 16> ResultMask;
10071   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10072     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10073 
10074     for (unsigned j = 0; j != BytesPerElement; ++j)
10075       if (isLittleEndian)
10076         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10077                                              dl, MVT::i32));
10078       else
10079         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10080                                              MVT::i32));
10081   }
10082 
10083   ShufflesHandledWithVPERM++;
10084   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10085   LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
10086   LLVM_DEBUG(SVOp->dump());
10087   LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
10088   LLVM_DEBUG(VPermMask.dump());
10089 
10090   if (isLittleEndian)
10091     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10092                        V2, V1, VPermMask);
10093   else
10094     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10095                        V1, V2, VPermMask);
10096 }
10097 
10098 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10099 /// vector comparison.  If it is, return true and fill in Opc/isDot with
10100 /// information about the intrinsic.
10101 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10102                                  bool &isDot, const PPCSubtarget &Subtarget) {
10103   unsigned IntrinsicID =
10104       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10105   CompareOpc = -1;
10106   isDot = false;
10107   switch (IntrinsicID) {
10108   default:
10109     return false;
10110   // Comparison predicates.
10111   case Intrinsic::ppc_altivec_vcmpbfp_p:
10112     CompareOpc = 966;
10113     isDot = true;
10114     break;
10115   case Intrinsic::ppc_altivec_vcmpeqfp_p:
10116     CompareOpc = 198;
10117     isDot = true;
10118     break;
10119   case Intrinsic::ppc_altivec_vcmpequb_p:
10120     CompareOpc = 6;
10121     isDot = true;
10122     break;
10123   case Intrinsic::ppc_altivec_vcmpequh_p:
10124     CompareOpc = 70;
10125     isDot = true;
10126     break;
10127   case Intrinsic::ppc_altivec_vcmpequw_p:
10128     CompareOpc = 134;
10129     isDot = true;
10130     break;
10131   case Intrinsic::ppc_altivec_vcmpequd_p:
10132     if (Subtarget.hasP8Altivec()) {
10133       CompareOpc = 199;
10134       isDot = true;
10135     } else
10136       return false;
10137     break;
10138   case Intrinsic::ppc_altivec_vcmpneb_p:
10139   case Intrinsic::ppc_altivec_vcmpneh_p:
10140   case Intrinsic::ppc_altivec_vcmpnew_p:
10141   case Intrinsic::ppc_altivec_vcmpnezb_p:
10142   case Intrinsic::ppc_altivec_vcmpnezh_p:
10143   case Intrinsic::ppc_altivec_vcmpnezw_p:
10144     if (Subtarget.hasP9Altivec()) {
10145       switch (IntrinsicID) {
10146       default:
10147         llvm_unreachable("Unknown comparison intrinsic.");
10148       case Intrinsic::ppc_altivec_vcmpneb_p:
10149         CompareOpc = 7;
10150         break;
10151       case Intrinsic::ppc_altivec_vcmpneh_p:
10152         CompareOpc = 71;
10153         break;
10154       case Intrinsic::ppc_altivec_vcmpnew_p:
10155         CompareOpc = 135;
10156         break;
10157       case Intrinsic::ppc_altivec_vcmpnezb_p:
10158         CompareOpc = 263;
10159         break;
10160       case Intrinsic::ppc_altivec_vcmpnezh_p:
10161         CompareOpc = 327;
10162         break;
10163       case Intrinsic::ppc_altivec_vcmpnezw_p:
10164         CompareOpc = 391;
10165         break;
10166       }
10167       isDot = true;
10168     } else
10169       return false;
10170     break;
10171   case Intrinsic::ppc_altivec_vcmpgefp_p:
10172     CompareOpc = 454;
10173     isDot = true;
10174     break;
10175   case Intrinsic::ppc_altivec_vcmpgtfp_p:
10176     CompareOpc = 710;
10177     isDot = true;
10178     break;
10179   case Intrinsic::ppc_altivec_vcmpgtsb_p:
10180     CompareOpc = 774;
10181     isDot = true;
10182     break;
10183   case Intrinsic::ppc_altivec_vcmpgtsh_p:
10184     CompareOpc = 838;
10185     isDot = true;
10186     break;
10187   case Intrinsic::ppc_altivec_vcmpgtsw_p:
10188     CompareOpc = 902;
10189     isDot = true;
10190     break;
10191   case Intrinsic::ppc_altivec_vcmpgtsd_p:
10192     if (Subtarget.hasP8Altivec()) {
10193       CompareOpc = 967;
10194       isDot = true;
10195     } else
10196       return false;
10197     break;
10198   case Intrinsic::ppc_altivec_vcmpgtub_p:
10199     CompareOpc = 518;
10200     isDot = true;
10201     break;
10202   case Intrinsic::ppc_altivec_vcmpgtuh_p:
10203     CompareOpc = 582;
10204     isDot = true;
10205     break;
10206   case Intrinsic::ppc_altivec_vcmpgtuw_p:
10207     CompareOpc = 646;
10208     isDot = true;
10209     break;
10210   case Intrinsic::ppc_altivec_vcmpgtud_p:
10211     if (Subtarget.hasP8Altivec()) {
10212       CompareOpc = 711;
10213       isDot = true;
10214     } else
10215       return false;
10216     break;
10217 
10218   case Intrinsic::ppc_altivec_vcmpequq:
10219   case Intrinsic::ppc_altivec_vcmpgtsq:
10220   case Intrinsic::ppc_altivec_vcmpgtuq:
10221     if (!Subtarget.isISA3_1())
10222       return false;
10223     switch (IntrinsicID) {
10224     default:
10225       llvm_unreachable("Unknown comparison intrinsic.");
10226     case Intrinsic::ppc_altivec_vcmpequq:
10227       CompareOpc = 455;
10228       break;
10229     case Intrinsic::ppc_altivec_vcmpgtsq:
10230       CompareOpc = 903;
10231       break;
10232     case Intrinsic::ppc_altivec_vcmpgtuq:
10233       CompareOpc = 647;
10234       break;
10235     }
10236     break;
10237 
10238   // VSX predicate comparisons use the same infrastructure
10239   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10240   case Intrinsic::ppc_vsx_xvcmpgedp_p:
10241   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10242   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10243   case Intrinsic::ppc_vsx_xvcmpgesp_p:
10244   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10245     if (Subtarget.hasVSX()) {
10246       switch (IntrinsicID) {
10247       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10248         CompareOpc = 99;
10249         break;
10250       case Intrinsic::ppc_vsx_xvcmpgedp_p:
10251         CompareOpc = 115;
10252         break;
10253       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10254         CompareOpc = 107;
10255         break;
10256       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10257         CompareOpc = 67;
10258         break;
10259       case Intrinsic::ppc_vsx_xvcmpgesp_p:
10260         CompareOpc = 83;
10261         break;
10262       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10263         CompareOpc = 75;
10264         break;
10265       }
10266       isDot = true;
10267     } else
10268       return false;
10269     break;
10270 
10271   // Normal Comparisons.
10272   case Intrinsic::ppc_altivec_vcmpbfp:
10273     CompareOpc = 966;
10274     break;
10275   case Intrinsic::ppc_altivec_vcmpeqfp:
10276     CompareOpc = 198;
10277     break;
10278   case Intrinsic::ppc_altivec_vcmpequb:
10279     CompareOpc = 6;
10280     break;
10281   case Intrinsic::ppc_altivec_vcmpequh:
10282     CompareOpc = 70;
10283     break;
10284   case Intrinsic::ppc_altivec_vcmpequw:
10285     CompareOpc = 134;
10286     break;
10287   case Intrinsic::ppc_altivec_vcmpequd:
10288     if (Subtarget.hasP8Altivec())
10289       CompareOpc = 199;
10290     else
10291       return false;
10292     break;
10293   case Intrinsic::ppc_altivec_vcmpneb:
10294   case Intrinsic::ppc_altivec_vcmpneh:
10295   case Intrinsic::ppc_altivec_vcmpnew:
10296   case Intrinsic::ppc_altivec_vcmpnezb:
10297   case Intrinsic::ppc_altivec_vcmpnezh:
10298   case Intrinsic::ppc_altivec_vcmpnezw:
10299     if (Subtarget.hasP9Altivec())
10300       switch (IntrinsicID) {
10301       default:
10302         llvm_unreachable("Unknown comparison intrinsic.");
10303       case Intrinsic::ppc_altivec_vcmpneb:
10304         CompareOpc = 7;
10305         break;
10306       case Intrinsic::ppc_altivec_vcmpneh:
10307         CompareOpc = 71;
10308         break;
10309       case Intrinsic::ppc_altivec_vcmpnew:
10310         CompareOpc = 135;
10311         break;
10312       case Intrinsic::ppc_altivec_vcmpnezb:
10313         CompareOpc = 263;
10314         break;
10315       case Intrinsic::ppc_altivec_vcmpnezh:
10316         CompareOpc = 327;
10317         break;
10318       case Intrinsic::ppc_altivec_vcmpnezw:
10319         CompareOpc = 391;
10320         break;
10321       }
10322     else
10323       return false;
10324     break;
10325   case Intrinsic::ppc_altivec_vcmpgefp:
10326     CompareOpc = 454;
10327     break;
10328   case Intrinsic::ppc_altivec_vcmpgtfp:
10329     CompareOpc = 710;
10330     break;
10331   case Intrinsic::ppc_altivec_vcmpgtsb:
10332     CompareOpc = 774;
10333     break;
10334   case Intrinsic::ppc_altivec_vcmpgtsh:
10335     CompareOpc = 838;
10336     break;
10337   case Intrinsic::ppc_altivec_vcmpgtsw:
10338     CompareOpc = 902;
10339     break;
10340   case Intrinsic::ppc_altivec_vcmpgtsd:
10341     if (Subtarget.hasP8Altivec())
10342       CompareOpc = 967;
10343     else
10344       return false;
10345     break;
10346   case Intrinsic::ppc_altivec_vcmpgtub:
10347     CompareOpc = 518;
10348     break;
10349   case Intrinsic::ppc_altivec_vcmpgtuh:
10350     CompareOpc = 582;
10351     break;
10352   case Intrinsic::ppc_altivec_vcmpgtuw:
10353     CompareOpc = 646;
10354     break;
10355   case Intrinsic::ppc_altivec_vcmpgtud:
10356     if (Subtarget.hasP8Altivec())
10357       CompareOpc = 711;
10358     else
10359       return false;
10360     break;
10361   case Intrinsic::ppc_altivec_vcmpequq_p:
10362   case Intrinsic::ppc_altivec_vcmpgtsq_p:
10363   case Intrinsic::ppc_altivec_vcmpgtuq_p:
10364     if (!Subtarget.isISA3_1())
10365       return false;
10366     switch (IntrinsicID) {
10367     default:
10368       llvm_unreachable("Unknown comparison intrinsic.");
10369     case Intrinsic::ppc_altivec_vcmpequq_p:
10370       CompareOpc = 455;
10371       break;
10372     case Intrinsic::ppc_altivec_vcmpgtsq_p:
10373       CompareOpc = 903;
10374       break;
10375     case Intrinsic::ppc_altivec_vcmpgtuq_p:
10376       CompareOpc = 647;
10377       break;
10378     }
10379     isDot = true;
10380     break;
10381   }
10382   return true;
10383 }
10384 
10385 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10386 /// lower, do it, otherwise return null.
10387 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10388                                                    SelectionDAG &DAG) const {
10389   unsigned IntrinsicID =
10390     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10391 
10392   SDLoc dl(Op);
10393 
10394   if (IntrinsicID == Intrinsic::thread_pointer) {
10395     // Reads the thread pointer register, used for __builtin_thread_pointer.
10396     if (Subtarget.isPPC64())
10397       return DAG.getRegister(PPC::X13, MVT::i64);
10398     return DAG.getRegister(PPC::R2, MVT::i32);
10399   }
10400 
10401   // If this is a lowered altivec predicate compare, CompareOpc is set to the
10402   // opcode number of the comparison.
10403   int CompareOpc;
10404   bool isDot;
10405   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10406     return SDValue();    // Don't custom lower most intrinsics.
10407 
10408   // If this is a non-dot comparison, make the VCMP node and we are done.
10409   if (!isDot) {
10410     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10411                               Op.getOperand(1), Op.getOperand(2),
10412                               DAG.getConstant(CompareOpc, dl, MVT::i32));
10413     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10414   }
10415 
10416   // Create the PPCISD altivec 'dot' comparison node.
10417   SDValue Ops[] = {
10418     Op.getOperand(2),  // LHS
10419     Op.getOperand(3),  // RHS
10420     DAG.getConstant(CompareOpc, dl, MVT::i32)
10421   };
10422   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10423   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
10424 
10425   // Now that we have the comparison, emit a copy from the CR to a GPR.
10426   // This is flagged to the above dot comparison.
10427   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10428                                 DAG.getRegister(PPC::CR6, MVT::i32),
10429                                 CompNode.getValue(1));
10430 
10431   // Unpack the result based on how the target uses it.
10432   unsigned BitNo;   // Bit # of CR6.
10433   bool InvertBit;   // Invert result?
10434   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10435   default:  // Can't happen, don't crash on invalid number though.
10436   case 0:   // Return the value of the EQ bit of CR6.
10437     BitNo = 0; InvertBit = false;
10438     break;
10439   case 1:   // Return the inverted value of the EQ bit of CR6.
10440     BitNo = 0; InvertBit = true;
10441     break;
10442   case 2:   // Return the value of the LT bit of CR6.
10443     BitNo = 2; InvertBit = false;
10444     break;
10445   case 3:   // Return the inverted value of the LT bit of CR6.
10446     BitNo = 2; InvertBit = true;
10447     break;
10448   }
10449 
10450   // Shift the bit into the low position.
10451   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10452                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10453   // Isolate the bit.
10454   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10455                       DAG.getConstant(1, dl, MVT::i32));
10456 
10457   // If we are supposed to, toggle the bit.
10458   if (InvertBit)
10459     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10460                         DAG.getConstant(1, dl, MVT::i32));
10461   return Flags;
10462 }
10463 
10464 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10465                                                SelectionDAG &DAG) const {
10466   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10467   // the beginning of the argument list.
10468   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10469   SDLoc DL(Op);
10470   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10471   case Intrinsic::ppc_cfence: {
10472     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10473     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10474     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10475                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
10476                                                   Op.getOperand(ArgStart + 1)),
10477                                       Op.getOperand(0)),
10478                    0);
10479   }
10480   default:
10481     break;
10482   }
10483   return SDValue();
10484 }
10485 
10486 // Lower scalar BSWAP64 to xxbrd.
10487 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10488   SDLoc dl(Op);
10489   // MTVSRDD
10490   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10491                    Op.getOperand(0));
10492   // XXBRD
10493   Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10494   // MFVSRD
10495   int VectorIndex = 0;
10496   if (Subtarget.isLittleEndian())
10497     VectorIndex = 1;
10498   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10499                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10500   return Op;
10501 }
10502 
10503 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10504 // compared to a value that is atomically loaded (atomic loads zero-extend).
10505 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10506                                                 SelectionDAG &DAG) const {
10507   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10508          "Expecting an atomic compare-and-swap here.");
10509   SDLoc dl(Op);
10510   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10511   EVT MemVT = AtomicNode->getMemoryVT();
10512   if (MemVT.getSizeInBits() >= 32)
10513     return Op;
10514 
10515   SDValue CmpOp = Op.getOperand(2);
10516   // If this is already correctly zero-extended, leave it alone.
10517   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10518   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10519     return Op;
10520 
10521   // Clear the high bits of the compare operand.
10522   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10523   SDValue NewCmpOp =
10524     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10525                 DAG.getConstant(MaskVal, dl, MVT::i32));
10526 
10527   // Replace the existing compare operand with the properly zero-extended one.
10528   SmallVector<SDValue, 4> Ops;
10529   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10530     Ops.push_back(AtomicNode->getOperand(i));
10531   Ops[2] = NewCmpOp;
10532   MachineMemOperand *MMO = AtomicNode->getMemOperand();
10533   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10534   auto NodeTy =
10535     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10536   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10537 }
10538 
10539 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10540                                                  SelectionDAG &DAG) const {
10541   SDLoc dl(Op);
10542   // Create a stack slot that is 16-byte aligned.
10543   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10544   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10545   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10546   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10547 
10548   // Store the input value into Value#0 of the stack slot.
10549   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10550                                MachinePointerInfo());
10551   // Load it out.
10552   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10553 }
10554 
10555 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10556                                                   SelectionDAG &DAG) const {
10557   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10558          "Should only be called for ISD::INSERT_VECTOR_ELT");
10559 
10560   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10561   // We have legal lowering for constant indices but not for variable ones.
10562   if (!C)
10563     return SDValue();
10564 
10565   EVT VT = Op.getValueType();
10566   SDLoc dl(Op);
10567   SDValue V1 = Op.getOperand(0);
10568   SDValue V2 = Op.getOperand(1);
10569   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10570   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10571     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10572     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10573     unsigned InsertAtElement = C->getZExtValue();
10574     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10575     if (Subtarget.isLittleEndian()) {
10576       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10577     }
10578     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10579                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
10580   }
10581   return Op;
10582 }
10583 
10584 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10585   SDLoc dl(Op);
10586   if (Op.getValueType() == MVT::v4i32) {
10587     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10588 
10589     SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10590     // +16 as shift amt.
10591     SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10592     SDValue RHSSwap =   // = vrlw RHS, 16
10593       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10594 
10595     // Shrinkify inputs to v8i16.
10596     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10597     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10598     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10599 
10600     // Low parts multiplied together, generating 32-bit results (we ignore the
10601     // top parts).
10602     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10603                                         LHS, RHS, DAG, dl, MVT::v4i32);
10604 
10605     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10606                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10607     // Shift the high parts up 16 bits.
10608     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10609                               Neg16, DAG, dl);
10610     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10611   } else if (Op.getValueType() == MVT::v16i8) {
10612     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10613     bool isLittleEndian = Subtarget.isLittleEndian();
10614 
10615     // Multiply the even 8-bit parts, producing 16-bit sums.
10616     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10617                                            LHS, RHS, DAG, dl, MVT::v8i16);
10618     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10619 
10620     // Multiply the odd 8-bit parts, producing 16-bit sums.
10621     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10622                                           LHS, RHS, DAG, dl, MVT::v8i16);
10623     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10624 
10625     // Merge the results together.  Because vmuleub and vmuloub are
10626     // instructions with a big-endian bias, we must reverse the
10627     // element numbering and reverse the meaning of "odd" and "even"
10628     // when generating little endian code.
10629     int Ops[16];
10630     for (unsigned i = 0; i != 8; ++i) {
10631       if (isLittleEndian) {
10632         Ops[i*2  ] = 2*i;
10633         Ops[i*2+1] = 2*i+16;
10634       } else {
10635         Ops[i*2  ] = 2*i+1;
10636         Ops[i*2+1] = 2*i+1+16;
10637       }
10638     }
10639     if (isLittleEndian)
10640       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10641     else
10642       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10643   } else {
10644     llvm_unreachable("Unknown mul to lower!");
10645   }
10646 }
10647 
10648 SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
10649 
10650   assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
10651 
10652   EVT VT = Op.getValueType();
10653   assert(VT.isVector() &&
10654          "Only set vector abs as custom, scalar abs shouldn't reach here!");
10655   assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
10656           VT == MVT::v16i8) &&
10657          "Unexpected vector element type!");
10658   assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
10659          "Current subtarget doesn't support smax v2i64!");
10660 
10661   // For vector abs, it can be lowered to:
10662   // abs x
10663   // ==>
10664   // y = -x
10665   // smax(x, y)
10666 
10667   SDLoc dl(Op);
10668   SDValue X = Op.getOperand(0);
10669   SDValue Zero = DAG.getConstant(0, dl, VT);
10670   SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
10671 
10672   // SMAX patch https://reviews.llvm.org/D47332
10673   // hasn't landed yet, so use intrinsic first here.
10674   // TODO: Should use SMAX directly once SMAX patch landed
10675   Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
10676   if (VT == MVT::v2i64)
10677     BifID = Intrinsic::ppc_altivec_vmaxsd;
10678   else if (VT == MVT::v8i16)
10679     BifID = Intrinsic::ppc_altivec_vmaxsh;
10680   else if (VT == MVT::v16i8)
10681     BifID = Intrinsic::ppc_altivec_vmaxsb;
10682 
10683   return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
10684 }
10685 
10686 // Custom lowering for fpext vf32 to v2f64
10687 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10688 
10689   assert(Op.getOpcode() == ISD::FP_EXTEND &&
10690          "Should only be called for ISD::FP_EXTEND");
10691 
10692   // FIXME: handle extends from half precision float vectors on P9.
10693   // We only want to custom lower an extend from v2f32 to v2f64.
10694   if (Op.getValueType() != MVT::v2f64 ||
10695       Op.getOperand(0).getValueType() != MVT::v2f32)
10696     return SDValue();
10697 
10698   SDLoc dl(Op);
10699   SDValue Op0 = Op.getOperand(0);
10700 
10701   switch (Op0.getOpcode()) {
10702   default:
10703     return SDValue();
10704   case ISD::EXTRACT_SUBVECTOR: {
10705     assert(Op0.getNumOperands() == 2 &&
10706            isa<ConstantSDNode>(Op0->getOperand(1)) &&
10707            "Node should have 2 operands with second one being a constant!");
10708 
10709     if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10710       return SDValue();
10711 
10712     // Custom lower is only done for high or low doubleword.
10713     int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10714     if (Idx % 2 != 0)
10715       return SDValue();
10716 
10717     // Since input is v4f32, at this point Idx is either 0 or 2.
10718     // Shift to get the doubleword position we want.
10719     int DWord = Idx >> 1;
10720 
10721     // High and low word positions are different on little endian.
10722     if (Subtarget.isLittleEndian())
10723       DWord ^= 0x1;
10724 
10725     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10726                        Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10727   }
10728   case ISD::FADD:
10729   case ISD::FMUL:
10730   case ISD::FSUB: {
10731     SDValue NewLoad[2];
10732     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10733       // Ensure both input are loads.
10734       SDValue LdOp = Op0.getOperand(i);
10735       if (LdOp.getOpcode() != ISD::LOAD)
10736         return SDValue();
10737       // Generate new load node.
10738       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10739       SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10740       NewLoad[i] = DAG.getMemIntrinsicNode(
10741           PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10742           LD->getMemoryVT(), LD->getMemOperand());
10743     }
10744     SDValue NewOp =
10745         DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10746                     NewLoad[1], Op0.getNode()->getFlags());
10747     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10748                        DAG.getConstant(0, dl, MVT::i32));
10749   }
10750   case ISD::LOAD: {
10751     LoadSDNode *LD = cast<LoadSDNode>(Op0);
10752     SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10753     SDValue NewLd = DAG.getMemIntrinsicNode(
10754         PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10755         LD->getMemoryVT(), LD->getMemOperand());
10756     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10757                        DAG.getConstant(0, dl, MVT::i32));
10758   }
10759   }
10760   llvm_unreachable("ERROR:Should return for all cases within swtich.");
10761 }
10762 
10763 /// LowerOperation - Provide custom lowering hooks for some operations.
10764 ///
10765 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10766   switch (Op.getOpcode()) {
10767   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10768   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
10769   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
10770   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
10771   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
10772   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
10773   case ISD::SETCC:              return LowerSETCC(Op, DAG);
10774   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
10775   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
10776 
10777   // Variable argument lowering.
10778   case ISD::VASTART:            return LowerVASTART(Op, DAG);
10779   case ISD::VAARG:              return LowerVAARG(Op, DAG);
10780   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
10781 
10782   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
10783   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10784   case ISD::GET_DYNAMIC_AREA_OFFSET:
10785     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10786 
10787   // Exception handling lowering.
10788   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
10789   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
10790   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
10791 
10792   case ISD::LOAD:               return LowerLOAD(Op, DAG);
10793   case ISD::STORE:              return LowerSTORE(Op, DAG);
10794   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
10795   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
10796   case ISD::STRICT_FP_TO_UINT:
10797   case ISD::STRICT_FP_TO_SINT:
10798   case ISD::FP_TO_UINT:
10799   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10800   case ISD::STRICT_UINT_TO_FP:
10801   case ISD::STRICT_SINT_TO_FP:
10802   case ISD::UINT_TO_FP:
10803   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
10804   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
10805 
10806   // Lower 64-bit shifts.
10807   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
10808   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
10809   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
10810 
10811   case ISD::FSHL:               return LowerFunnelShift(Op, DAG);
10812   case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
10813 
10814   // Vector-related lowering.
10815   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
10816   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
10817   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10818   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
10819   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
10820   case ISD::MUL:                return LowerMUL(Op, DAG);
10821   case ISD::ABS:                return LowerABS(Op, DAG);
10822   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
10823   case ISD::ROTL:               return LowerROTL(Op, DAG);
10824 
10825   // For counter-based loop handling.
10826   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
10827 
10828   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
10829 
10830   // Frame & Return address.
10831   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
10832   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
10833 
10834   case ISD::INTRINSIC_VOID:
10835     return LowerINTRINSIC_VOID(Op, DAG);
10836   case ISD::BSWAP:
10837     return LowerBSWAP(Op, DAG);
10838   case ISD::ATOMIC_CMP_SWAP:
10839     return LowerATOMIC_CMP_SWAP(Op, DAG);
10840   }
10841 }
10842 
10843 void PPCTargetLowering::LowerOperationWrapper(SDNode *N,
10844                                               SmallVectorImpl<SDValue> &Results,
10845                                               SelectionDAG &DAG) const {
10846   SDValue Res = LowerOperation(SDValue(N, 0), DAG);
10847 
10848   if (!Res.getNode())
10849     return;
10850 
10851   // Take the return value as-is if original node has only one result.
10852   if (N->getNumValues() == 1) {
10853     Results.push_back(Res);
10854     return;
10855   }
10856 
10857   // New node should have the same number of results.
10858   assert((N->getNumValues() == Res->getNumValues()) &&
10859       "Lowering returned the wrong number of results!");
10860 
10861   for (unsigned i = 0; i < N->getNumValues(); ++i)
10862     Results.push_back(Res.getValue(i));
10863 }
10864 
10865 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
10866                                            SmallVectorImpl<SDValue>&Results,
10867                                            SelectionDAG &DAG) const {
10868   SDLoc dl(N);
10869   switch (N->getOpcode()) {
10870   default:
10871     llvm_unreachable("Do not know how to custom type legalize this operation!");
10872   case ISD::READCYCLECOUNTER: {
10873     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
10874     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10875 
10876     Results.push_back(
10877         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
10878     Results.push_back(RTB.getValue(2));
10879     break;
10880   }
10881   case ISD::INTRINSIC_W_CHAIN: {
10882     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10883         Intrinsic::loop_decrement)
10884       break;
10885 
10886     assert(N->getValueType(0) == MVT::i1 &&
10887            "Unexpected result type for CTR decrement intrinsic");
10888     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10889                                  N->getValueType(0));
10890     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10891     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10892                                  N->getOperand(1));
10893 
10894     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10895     Results.push_back(NewInt.getValue(1));
10896     break;
10897   }
10898   case ISD::VAARG: {
10899     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10900       return;
10901 
10902     EVT VT = N->getValueType(0);
10903 
10904     if (VT == MVT::i64) {
10905       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10906 
10907       Results.push_back(NewNode);
10908       Results.push_back(NewNode.getValue(1));
10909     }
10910     return;
10911   }
10912   case ISD::STRICT_FP_TO_SINT:
10913   case ISD::STRICT_FP_TO_UINT:
10914   case ISD::FP_TO_SINT:
10915   case ISD::FP_TO_UINT:
10916     // LowerFP_TO_INT() can only handle f32 and f64.
10917     if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
10918         MVT::ppcf128)
10919       return;
10920     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10921     return;
10922   case ISD::TRUNCATE: {
10923     if (!N->getValueType(0).isVector())
10924       return;
10925     SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
10926     if (Lowered)
10927       Results.push_back(Lowered);
10928     return;
10929   }
10930   case ISD::BITCAST:
10931     // Don't handle bitcast here.
10932     return;
10933   case ISD::FP_EXTEND:
10934     SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
10935     if (Lowered)
10936       Results.push_back(Lowered);
10937     return;
10938   }
10939 }
10940 
10941 //===----------------------------------------------------------------------===//
10942 //  Other Lowering Code
10943 //===----------------------------------------------------------------------===//
10944 
10945 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
10946   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10947   Function *Func = Intrinsic::getDeclaration(M, Id);
10948   return Builder.CreateCall(Func, {});
10949 }
10950 
10951 // The mappings for emitLeading/TrailingFence is taken from
10952 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10953 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
10954                                                  Instruction *Inst,
10955                                                  AtomicOrdering Ord) const {
10956   if (Ord == AtomicOrdering::SequentiallyConsistent)
10957     return callIntrinsic(Builder, Intrinsic::ppc_sync);
10958   if (isReleaseOrStronger(Ord))
10959     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10960   return nullptr;
10961 }
10962 
10963 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
10964                                                   Instruction *Inst,
10965                                                   AtomicOrdering Ord) const {
10966   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
10967     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10968     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10969     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10970     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
10971       return Builder.CreateCall(
10972           Intrinsic::getDeclaration(
10973               Builder.GetInsertBlock()->getParent()->getParent(),
10974               Intrinsic::ppc_cfence, {Inst->getType()}),
10975           {Inst});
10976     // FIXME: Can use isync for rmw operation.
10977     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10978   }
10979   return nullptr;
10980 }
10981 
10982 MachineBasicBlock *
10983 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
10984                                     unsigned AtomicSize,
10985                                     unsigned BinOpcode,
10986                                     unsigned CmpOpcode,
10987                                     unsigned CmpPred) const {
10988   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10989   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10990 
10991   auto LoadMnemonic = PPC::LDARX;
10992   auto StoreMnemonic = PPC::STDCX;
10993   switch (AtomicSize) {
10994   default:
10995     llvm_unreachable("Unexpected size of atomic entity");
10996   case 1:
10997     LoadMnemonic = PPC::LBARX;
10998     StoreMnemonic = PPC::STBCX;
10999     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11000     break;
11001   case 2:
11002     LoadMnemonic = PPC::LHARX;
11003     StoreMnemonic = PPC::STHCX;
11004     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11005     break;
11006   case 4:
11007     LoadMnemonic = PPC::LWARX;
11008     StoreMnemonic = PPC::STWCX;
11009     break;
11010   case 8:
11011     LoadMnemonic = PPC::LDARX;
11012     StoreMnemonic = PPC::STDCX;
11013     break;
11014   }
11015 
11016   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11017   MachineFunction *F = BB->getParent();
11018   MachineFunction::iterator It = ++BB->getIterator();
11019 
11020   Register dest = MI.getOperand(0).getReg();
11021   Register ptrA = MI.getOperand(1).getReg();
11022   Register ptrB = MI.getOperand(2).getReg();
11023   Register incr = MI.getOperand(3).getReg();
11024   DebugLoc dl = MI.getDebugLoc();
11025 
11026   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11027   MachineBasicBlock *loop2MBB =
11028     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11029   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11030   F->insert(It, loopMBB);
11031   if (CmpOpcode)
11032     F->insert(It, loop2MBB);
11033   F->insert(It, exitMBB);
11034   exitMBB->splice(exitMBB->begin(), BB,
11035                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11036   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11037 
11038   MachineRegisterInfo &RegInfo = F->getRegInfo();
11039   Register TmpReg = (!BinOpcode) ? incr :
11040     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11041                                            : &PPC::GPRCRegClass);
11042 
11043   //  thisMBB:
11044   //   ...
11045   //   fallthrough --> loopMBB
11046   BB->addSuccessor(loopMBB);
11047 
11048   //  loopMBB:
11049   //   l[wd]arx dest, ptr
11050   //   add r0, dest, incr
11051   //   st[wd]cx. r0, ptr
11052   //   bne- loopMBB
11053   //   fallthrough --> exitMBB
11054 
11055   // For max/min...
11056   //  loopMBB:
11057   //   l[wd]arx dest, ptr
11058   //   cmpl?[wd] incr, dest
11059   //   bgt exitMBB
11060   //  loop2MBB:
11061   //   st[wd]cx. dest, ptr
11062   //   bne- loopMBB
11063   //   fallthrough --> exitMBB
11064 
11065   BB = loopMBB;
11066   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11067     .addReg(ptrA).addReg(ptrB);
11068   if (BinOpcode)
11069     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11070   if (CmpOpcode) {
11071     // Signed comparisons of byte or halfword values must be sign-extended.
11072     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11073       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11074       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11075               ExtReg).addReg(dest);
11076       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11077         .addReg(incr).addReg(ExtReg);
11078     } else
11079       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11080         .addReg(incr).addReg(dest);
11081 
11082     BuildMI(BB, dl, TII->get(PPC::BCC))
11083       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11084     BB->addSuccessor(loop2MBB);
11085     BB->addSuccessor(exitMBB);
11086     BB = loop2MBB;
11087   }
11088   BuildMI(BB, dl, TII->get(StoreMnemonic))
11089     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11090   BuildMI(BB, dl, TII->get(PPC::BCC))
11091     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11092   BB->addSuccessor(loopMBB);
11093   BB->addSuccessor(exitMBB);
11094 
11095   //  exitMBB:
11096   //   ...
11097   BB = exitMBB;
11098   return BB;
11099 }
11100 
11101 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
11102     MachineInstr &MI, MachineBasicBlock *BB,
11103     bool is8bit, // operation
11104     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11105   // If we support part-word atomic mnemonics, just use them
11106   if (Subtarget.hasPartwordAtomics())
11107     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11108                             CmpPred);
11109 
11110   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11111   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11112   // In 64 bit mode we have to use 64 bits for addresses, even though the
11113   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
11114   // registers without caring whether they're 32 or 64, but here we're
11115   // doing actual arithmetic on the addresses.
11116   bool is64bit = Subtarget.isPPC64();
11117   bool isLittleEndian = Subtarget.isLittleEndian();
11118   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11119 
11120   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11121   MachineFunction *F = BB->getParent();
11122   MachineFunction::iterator It = ++BB->getIterator();
11123 
11124   Register dest = MI.getOperand(0).getReg();
11125   Register ptrA = MI.getOperand(1).getReg();
11126   Register ptrB = MI.getOperand(2).getReg();
11127   Register incr = MI.getOperand(3).getReg();
11128   DebugLoc dl = MI.getDebugLoc();
11129 
11130   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11131   MachineBasicBlock *loop2MBB =
11132       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11133   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11134   F->insert(It, loopMBB);
11135   if (CmpOpcode)
11136     F->insert(It, loop2MBB);
11137   F->insert(It, exitMBB);
11138   exitMBB->splice(exitMBB->begin(), BB,
11139                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11140   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11141 
11142   MachineRegisterInfo &RegInfo = F->getRegInfo();
11143   const TargetRegisterClass *RC =
11144       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11145   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11146 
11147   Register PtrReg = RegInfo.createVirtualRegister(RC);
11148   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11149   Register ShiftReg =
11150       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11151   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11152   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11153   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11154   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11155   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11156   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11157   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11158   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11159   Register Ptr1Reg;
11160   Register TmpReg =
11161       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11162 
11163   //  thisMBB:
11164   //   ...
11165   //   fallthrough --> loopMBB
11166   BB->addSuccessor(loopMBB);
11167 
11168   // The 4-byte load must be aligned, while a char or short may be
11169   // anywhere in the word.  Hence all this nasty bookkeeping code.
11170   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
11171   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11172   //   xori shift, shift1, 24 [16]
11173   //   rlwinm ptr, ptr1, 0, 0, 29
11174   //   slw incr2, incr, shift
11175   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11176   //   slw mask, mask2, shift
11177   //  loopMBB:
11178   //   lwarx tmpDest, ptr
11179   //   add tmp, tmpDest, incr2
11180   //   andc tmp2, tmpDest, mask
11181   //   and tmp3, tmp, mask
11182   //   or tmp4, tmp3, tmp2
11183   //   stwcx. tmp4, ptr
11184   //   bne- loopMBB
11185   //   fallthrough --> exitMBB
11186   //   srw dest, tmpDest, shift
11187   if (ptrA != ZeroReg) {
11188     Ptr1Reg = RegInfo.createVirtualRegister(RC);
11189     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11190         .addReg(ptrA)
11191         .addReg(ptrB);
11192   } else {
11193     Ptr1Reg = ptrB;
11194   }
11195   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11196   // mode.
11197   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11198       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11199       .addImm(3)
11200       .addImm(27)
11201       .addImm(is8bit ? 28 : 27);
11202   if (!isLittleEndian)
11203     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11204         .addReg(Shift1Reg)
11205         .addImm(is8bit ? 24 : 16);
11206   if (is64bit)
11207     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11208         .addReg(Ptr1Reg)
11209         .addImm(0)
11210         .addImm(61);
11211   else
11212     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11213         .addReg(Ptr1Reg)
11214         .addImm(0)
11215         .addImm(0)
11216         .addImm(29);
11217   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11218   if (is8bit)
11219     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11220   else {
11221     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11222     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11223         .addReg(Mask3Reg)
11224         .addImm(65535);
11225   }
11226   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11227       .addReg(Mask2Reg)
11228       .addReg(ShiftReg);
11229 
11230   BB = loopMBB;
11231   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11232       .addReg(ZeroReg)
11233       .addReg(PtrReg);
11234   if (BinOpcode)
11235     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11236         .addReg(Incr2Reg)
11237         .addReg(TmpDestReg);
11238   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11239       .addReg(TmpDestReg)
11240       .addReg(MaskReg);
11241   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11242   if (CmpOpcode) {
11243     // For unsigned comparisons, we can directly compare the shifted values.
11244     // For signed comparisons we shift and sign extend.
11245     Register SReg = RegInfo.createVirtualRegister(GPRC);
11246     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11247         .addReg(TmpDestReg)
11248         .addReg(MaskReg);
11249     unsigned ValueReg = SReg;
11250     unsigned CmpReg = Incr2Reg;
11251     if (CmpOpcode == PPC::CMPW) {
11252       ValueReg = RegInfo.createVirtualRegister(GPRC);
11253       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11254           .addReg(SReg)
11255           .addReg(ShiftReg);
11256       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11257       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11258           .addReg(ValueReg);
11259       ValueReg = ValueSReg;
11260       CmpReg = incr;
11261     }
11262     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11263         .addReg(CmpReg)
11264         .addReg(ValueReg);
11265     BuildMI(BB, dl, TII->get(PPC::BCC))
11266         .addImm(CmpPred)
11267         .addReg(PPC::CR0)
11268         .addMBB(exitMBB);
11269     BB->addSuccessor(loop2MBB);
11270     BB->addSuccessor(exitMBB);
11271     BB = loop2MBB;
11272   }
11273   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11274   BuildMI(BB, dl, TII->get(PPC::STWCX))
11275       .addReg(Tmp4Reg)
11276       .addReg(ZeroReg)
11277       .addReg(PtrReg);
11278   BuildMI(BB, dl, TII->get(PPC::BCC))
11279       .addImm(PPC::PRED_NE)
11280       .addReg(PPC::CR0)
11281       .addMBB(loopMBB);
11282   BB->addSuccessor(loopMBB);
11283   BB->addSuccessor(exitMBB);
11284 
11285   //  exitMBB:
11286   //   ...
11287   BB = exitMBB;
11288   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11289       .addReg(TmpDestReg)
11290       .addReg(ShiftReg);
11291   return BB;
11292 }
11293 
11294 llvm::MachineBasicBlock *
11295 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
11296                                     MachineBasicBlock *MBB) const {
11297   DebugLoc DL = MI.getDebugLoc();
11298   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11299   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11300 
11301   MachineFunction *MF = MBB->getParent();
11302   MachineRegisterInfo &MRI = MF->getRegInfo();
11303 
11304   const BasicBlock *BB = MBB->getBasicBlock();
11305   MachineFunction::iterator I = ++MBB->getIterator();
11306 
11307   Register DstReg = MI.getOperand(0).getReg();
11308   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11309   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11310   Register mainDstReg = MRI.createVirtualRegister(RC);
11311   Register restoreDstReg = MRI.createVirtualRegister(RC);
11312 
11313   MVT PVT = getPointerTy(MF->getDataLayout());
11314   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11315          "Invalid Pointer Size!");
11316   // For v = setjmp(buf), we generate
11317   //
11318   // thisMBB:
11319   //  SjLjSetup mainMBB
11320   //  bl mainMBB
11321   //  v_restore = 1
11322   //  b sinkMBB
11323   //
11324   // mainMBB:
11325   //  buf[LabelOffset] = LR
11326   //  v_main = 0
11327   //
11328   // sinkMBB:
11329   //  v = phi(main, restore)
11330   //
11331 
11332   MachineBasicBlock *thisMBB = MBB;
11333   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
11334   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
11335   MF->insert(I, mainMBB);
11336   MF->insert(I, sinkMBB);
11337 
11338   MachineInstrBuilder MIB;
11339 
11340   // Transfer the remainder of BB and its successor edges to sinkMBB.
11341   sinkMBB->splice(sinkMBB->begin(), MBB,
11342                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11343   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
11344 
11345   // Note that the structure of the jmp_buf used here is not compatible
11346   // with that used by libc, and is not designed to be. Specifically, it
11347   // stores only those 'reserved' registers that LLVM does not otherwise
11348   // understand how to spill. Also, by convention, by the time this
11349   // intrinsic is called, Clang has already stored the frame address in the
11350   // first slot of the buffer and stack address in the third. Following the
11351   // X86 target code, we'll store the jump address in the second slot. We also
11352   // need to save the TOC pointer (R2) to handle jumps between shared
11353   // libraries, and that will be stored in the fourth slot. The thread
11354   // identifier (R13) is not affected.
11355 
11356   // thisMBB:
11357   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11358   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11359   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11360 
11361   // Prepare IP either in reg.
11362   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11363   Register LabelReg = MRI.createVirtualRegister(PtrRC);
11364   Register BufReg = MI.getOperand(1).getReg();
11365 
11366   if (Subtarget.is64BitELFABI()) {
11367     setUsesTOCBasePtr(*MBB->getParent());
11368     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11369               .addReg(PPC::X2)
11370               .addImm(TOCOffset)
11371               .addReg(BufReg)
11372               .cloneMemRefs(MI);
11373   }
11374 
11375   // Naked functions never have a base pointer, and so we use r1. For all
11376   // other functions, this decision must be delayed until during PEI.
11377   unsigned BaseReg;
11378   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11379     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11380   else
11381     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11382 
11383   MIB = BuildMI(*thisMBB, MI, DL,
11384                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11385             .addReg(BaseReg)
11386             .addImm(BPOffset)
11387             .addReg(BufReg)
11388             .cloneMemRefs(MI);
11389 
11390   // Setup
11391   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11392   MIB.addRegMask(TRI->getNoPreservedMask());
11393 
11394   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11395 
11396   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11397           .addMBB(mainMBB);
11398   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11399 
11400   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11401   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11402 
11403   // mainMBB:
11404   //  mainDstReg = 0
11405   MIB =
11406       BuildMI(mainMBB, DL,
11407               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11408 
11409   // Store IP
11410   if (Subtarget.isPPC64()) {
11411     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11412             .addReg(LabelReg)
11413             .addImm(LabelOffset)
11414             .addReg(BufReg);
11415   } else {
11416     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11417             .addReg(LabelReg)
11418             .addImm(LabelOffset)
11419             .addReg(BufReg);
11420   }
11421   MIB.cloneMemRefs(MI);
11422 
11423   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11424   mainMBB->addSuccessor(sinkMBB);
11425 
11426   // sinkMBB:
11427   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11428           TII->get(PPC::PHI), DstReg)
11429     .addReg(mainDstReg).addMBB(mainMBB)
11430     .addReg(restoreDstReg).addMBB(thisMBB);
11431 
11432   MI.eraseFromParent();
11433   return sinkMBB;
11434 }
11435 
11436 MachineBasicBlock *
11437 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11438                                      MachineBasicBlock *MBB) const {
11439   DebugLoc DL = MI.getDebugLoc();
11440   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11441 
11442   MachineFunction *MF = MBB->getParent();
11443   MachineRegisterInfo &MRI = MF->getRegInfo();
11444 
11445   MVT PVT = getPointerTy(MF->getDataLayout());
11446   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11447          "Invalid Pointer Size!");
11448 
11449   const TargetRegisterClass *RC =
11450     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11451   Register Tmp = MRI.createVirtualRegister(RC);
11452   // Since FP is only updated here but NOT referenced, it's treated as GPR.
11453   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11454   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11455   unsigned BP =
11456       (PVT == MVT::i64)
11457           ? PPC::X30
11458           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11459                                                               : PPC::R30);
11460 
11461   MachineInstrBuilder MIB;
11462 
11463   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11464   const int64_t SPOffset    = 2 * PVT.getStoreSize();
11465   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11466   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11467 
11468   Register BufReg = MI.getOperand(0).getReg();
11469 
11470   // Reload FP (the jumped-to function may not have had a
11471   // frame pointer, and if so, then its r31 will be restored
11472   // as necessary).
11473   if (PVT == MVT::i64) {
11474     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11475             .addImm(0)
11476             .addReg(BufReg);
11477   } else {
11478     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11479             .addImm(0)
11480             .addReg(BufReg);
11481   }
11482   MIB.cloneMemRefs(MI);
11483 
11484   // Reload IP
11485   if (PVT == MVT::i64) {
11486     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11487             .addImm(LabelOffset)
11488             .addReg(BufReg);
11489   } else {
11490     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11491             .addImm(LabelOffset)
11492             .addReg(BufReg);
11493   }
11494   MIB.cloneMemRefs(MI);
11495 
11496   // Reload SP
11497   if (PVT == MVT::i64) {
11498     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11499             .addImm(SPOffset)
11500             .addReg(BufReg);
11501   } else {
11502     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11503             .addImm(SPOffset)
11504             .addReg(BufReg);
11505   }
11506   MIB.cloneMemRefs(MI);
11507 
11508   // Reload BP
11509   if (PVT == MVT::i64) {
11510     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11511             .addImm(BPOffset)
11512             .addReg(BufReg);
11513   } else {
11514     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11515             .addImm(BPOffset)
11516             .addReg(BufReg);
11517   }
11518   MIB.cloneMemRefs(MI);
11519 
11520   // Reload TOC
11521   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11522     setUsesTOCBasePtr(*MBB->getParent());
11523     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11524               .addImm(TOCOffset)
11525               .addReg(BufReg)
11526               .cloneMemRefs(MI);
11527   }
11528 
11529   // Jump
11530   BuildMI(*MBB, MI, DL,
11531           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11532   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11533 
11534   MI.eraseFromParent();
11535   return MBB;
11536 }
11537 
11538 bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
11539   // If the function specifically requests inline stack probes, emit them.
11540   if (MF.getFunction().hasFnAttribute("probe-stack"))
11541     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11542            "inline-asm";
11543   return false;
11544 }
11545 
11546 unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
11547   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11548   unsigned StackAlign = TFI->getStackAlignment();
11549   assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
11550          "Unexpected stack alignment");
11551   // The default stack probe size is 4096 if the function has no
11552   // stack-probe-size attribute.
11553   unsigned StackProbeSize = 4096;
11554   const Function &Fn = MF.getFunction();
11555   if (Fn.hasFnAttribute("stack-probe-size"))
11556     Fn.getFnAttribute("stack-probe-size")
11557         .getValueAsString()
11558         .getAsInteger(0, StackProbeSize);
11559   // Round down to the stack alignment.
11560   StackProbeSize &= ~(StackAlign - 1);
11561   return StackProbeSize ? StackProbeSize : StackAlign;
11562 }
11563 
11564 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11565 // into three phases. In the first phase, it uses pseudo instruction
11566 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11567 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11568 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11569 // MaxCallFrameSize so that it can calculate correct data area pointer.
11570 MachineBasicBlock *
11571 PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
11572                                     MachineBasicBlock *MBB) const {
11573   const bool isPPC64 = Subtarget.isPPC64();
11574   MachineFunction *MF = MBB->getParent();
11575   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11576   DebugLoc DL = MI.getDebugLoc();
11577   const unsigned ProbeSize = getStackProbeSize(*MF);
11578   const BasicBlock *ProbedBB = MBB->getBasicBlock();
11579   MachineRegisterInfo &MRI = MF->getRegInfo();
11580   // The CFG of probing stack looks as
11581   //         +-----+
11582   //         | MBB |
11583   //         +--+--+
11584   //            |
11585   //       +----v----+
11586   //  +--->+ TestMBB +---+
11587   //  |    +----+----+   |
11588   //  |         |        |
11589   //  |   +-----v----+   |
11590   //  +---+ BlockMBB |   |
11591   //      +----------+   |
11592   //                     |
11593   //       +---------+   |
11594   //       | TailMBB +<--+
11595   //       +---------+
11596   // In MBB, calculate previous frame pointer and final stack pointer.
11597   // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11598   // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11599   // TailMBB is spliced via \p MI.
11600   MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11601   MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11602   MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11603 
11604   MachineFunction::iterator MBBIter = ++MBB->getIterator();
11605   MF->insert(MBBIter, TestMBB);
11606   MF->insert(MBBIter, BlockMBB);
11607   MF->insert(MBBIter, TailMBB);
11608 
11609   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11610   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11611 
11612   Register DstReg = MI.getOperand(0).getReg();
11613   Register NegSizeReg = MI.getOperand(1).getReg();
11614   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11615   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11616   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11617   Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11618 
11619   // Since value of NegSizeReg might be realigned in prologepilog, insert a
11620   // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11621   // NegSize.
11622   unsigned ProbeOpc;
11623   if (!MRI.hasOneNonDBGUse(NegSizeReg))
11624     ProbeOpc =
11625         isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11626   else
11627     // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11628     // and NegSizeReg will be allocated in the same phyreg to avoid
11629     // redundant copy when NegSizeReg has only one use which is current MI and
11630     // will be replaced by PREPARE_PROBED_ALLOCA then.
11631     ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11632                        : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11633   BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11634       .addDef(ActualNegSizeReg)
11635       .addReg(NegSizeReg)
11636       .add(MI.getOperand(2))
11637       .add(MI.getOperand(3));
11638 
11639   // Calculate final stack pointer, which equals to SP + ActualNegSize.
11640   BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11641           FinalStackPtr)
11642       .addReg(SPReg)
11643       .addReg(ActualNegSizeReg);
11644 
11645   // Materialize a scratch register for update.
11646   int64_t NegProbeSize = -(int64_t)ProbeSize;
11647   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11648   Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11649   if (!isInt<16>(NegProbeSize)) {
11650     Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11651     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11652         .addImm(NegProbeSize >> 16);
11653     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11654             ScratchReg)
11655         .addReg(TempReg)
11656         .addImm(NegProbeSize & 0xFFFF);
11657   } else
11658     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11659         .addImm(NegProbeSize);
11660 
11661   {
11662     // Probing leading residual part.
11663     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11664     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11665         .addReg(ActualNegSizeReg)
11666         .addReg(ScratchReg);
11667     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11668     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11669         .addReg(Div)
11670         .addReg(ScratchReg);
11671     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11672     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11673         .addReg(Mul)
11674         .addReg(ActualNegSizeReg);
11675     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11676         .addReg(FramePointer)
11677         .addReg(SPReg)
11678         .addReg(NegMod);
11679   }
11680 
11681   {
11682     // Remaining part should be multiple of ProbeSize.
11683     Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11684     BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11685         .addReg(SPReg)
11686         .addReg(FinalStackPtr);
11687     BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11688         .addImm(PPC::PRED_EQ)
11689         .addReg(CmpResult)
11690         .addMBB(TailMBB);
11691     TestMBB->addSuccessor(BlockMBB);
11692     TestMBB->addSuccessor(TailMBB);
11693   }
11694 
11695   {
11696     // Touch the block.
11697     // |P...|P...|P...
11698     BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11699         .addReg(FramePointer)
11700         .addReg(SPReg)
11701         .addReg(ScratchReg);
11702     BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11703     BlockMBB->addSuccessor(TestMBB);
11704   }
11705 
11706   // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11707   // DYNAREAOFFSET pseudo instruction to get the future result.
11708   Register MaxCallFrameSizeReg =
11709       MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11710   BuildMI(TailMBB, DL,
11711           TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11712           MaxCallFrameSizeReg)
11713       .add(MI.getOperand(2))
11714       .add(MI.getOperand(3));
11715   BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11716       .addReg(SPReg)
11717       .addReg(MaxCallFrameSizeReg);
11718 
11719   // Splice instructions after MI to TailMBB.
11720   TailMBB->splice(TailMBB->end(), MBB,
11721                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11722   TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
11723   MBB->addSuccessor(TestMBB);
11724 
11725   // Delete the pseudo instruction.
11726   MI.eraseFromParent();
11727 
11728   ++NumDynamicAllocaProbed;
11729   return TailMBB;
11730 }
11731 
11732 MachineBasicBlock *
11733 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11734                                                MachineBasicBlock *BB) const {
11735   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11736       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11737     if (Subtarget.is64BitELFABI() &&
11738         MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11739         !Subtarget.isUsingPCRelativeCalls()) {
11740       // Call lowering should have added an r2 operand to indicate a dependence
11741       // on the TOC base pointer value. It can't however, because there is no
11742       // way to mark the dependence as implicit there, and so the stackmap code
11743       // will confuse it with a regular operand. Instead, add the dependence
11744       // here.
11745       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11746     }
11747 
11748     return emitPatchPoint(MI, BB);
11749   }
11750 
11751   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11752       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11753     return emitEHSjLjSetJmp(MI, BB);
11754   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11755              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11756     return emitEHSjLjLongJmp(MI, BB);
11757   }
11758 
11759   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11760 
11761   // To "insert" these instructions we actually have to insert their
11762   // control-flow patterns.
11763   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11764   MachineFunction::iterator It = ++BB->getIterator();
11765 
11766   MachineFunction *F = BB->getParent();
11767 
11768   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11769       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11770       MI.getOpcode() == PPC::SELECT_I8) {
11771     SmallVector<MachineOperand, 2> Cond;
11772     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11773         MI.getOpcode() == PPC::SELECT_CC_I8)
11774       Cond.push_back(MI.getOperand(4));
11775     else
11776       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
11777     Cond.push_back(MI.getOperand(1));
11778 
11779     DebugLoc dl = MI.getDebugLoc();
11780     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11781                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11782   } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11783              MI.getOpcode() == PPC::SELECT_CC_F8 ||
11784              MI.getOpcode() == PPC::SELECT_CC_F16 ||
11785              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11786              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11787              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11788              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11789              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11790              MI.getOpcode() == PPC::SELECT_CC_SPE ||
11791              MI.getOpcode() == PPC::SELECT_F4 ||
11792              MI.getOpcode() == PPC::SELECT_F8 ||
11793              MI.getOpcode() == PPC::SELECT_F16 ||
11794              MI.getOpcode() == PPC::SELECT_SPE ||
11795              MI.getOpcode() == PPC::SELECT_SPE4 ||
11796              MI.getOpcode() == PPC::SELECT_VRRC ||
11797              MI.getOpcode() == PPC::SELECT_VSFRC ||
11798              MI.getOpcode() == PPC::SELECT_VSSRC ||
11799              MI.getOpcode() == PPC::SELECT_VSRC) {
11800     // The incoming instruction knows the destination vreg to set, the
11801     // condition code register to branch on, the true/false values to
11802     // select between, and a branch opcode to use.
11803 
11804     //  thisMBB:
11805     //  ...
11806     //   TrueVal = ...
11807     //   cmpTY ccX, r1, r2
11808     //   bCC copy1MBB
11809     //   fallthrough --> copy0MBB
11810     MachineBasicBlock *thisMBB = BB;
11811     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11812     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11813     DebugLoc dl = MI.getDebugLoc();
11814     F->insert(It, copy0MBB);
11815     F->insert(It, sinkMBB);
11816 
11817     // Transfer the remainder of BB and its successor edges to sinkMBB.
11818     sinkMBB->splice(sinkMBB->begin(), BB,
11819                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11820     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11821 
11822     // Next, add the true and fallthrough blocks as its successors.
11823     BB->addSuccessor(copy0MBB);
11824     BB->addSuccessor(sinkMBB);
11825 
11826     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11827         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11828         MI.getOpcode() == PPC::SELECT_F16 ||
11829         MI.getOpcode() == PPC::SELECT_SPE4 ||
11830         MI.getOpcode() == PPC::SELECT_SPE ||
11831         MI.getOpcode() == PPC::SELECT_VRRC ||
11832         MI.getOpcode() == PPC::SELECT_VSFRC ||
11833         MI.getOpcode() == PPC::SELECT_VSSRC ||
11834         MI.getOpcode() == PPC::SELECT_VSRC) {
11835       BuildMI(BB, dl, TII->get(PPC::BC))
11836           .addReg(MI.getOperand(1).getReg())
11837           .addMBB(sinkMBB);
11838     } else {
11839       unsigned SelectPred = MI.getOperand(4).getImm();
11840       BuildMI(BB, dl, TII->get(PPC::BCC))
11841           .addImm(SelectPred)
11842           .addReg(MI.getOperand(1).getReg())
11843           .addMBB(sinkMBB);
11844     }
11845 
11846     //  copy0MBB:
11847     //   %FalseValue = ...
11848     //   # fallthrough to sinkMBB
11849     BB = copy0MBB;
11850 
11851     // Update machine-CFG edges
11852     BB->addSuccessor(sinkMBB);
11853 
11854     //  sinkMBB:
11855     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11856     //  ...
11857     BB = sinkMBB;
11858     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11859         .addReg(MI.getOperand(3).getReg())
11860         .addMBB(copy0MBB)
11861         .addReg(MI.getOperand(2).getReg())
11862         .addMBB(thisMBB);
11863   } else if (MI.getOpcode() == PPC::ReadTB) {
11864     // To read the 64-bit time-base register on a 32-bit target, we read the
11865     // two halves. Should the counter have wrapped while it was being read, we
11866     // need to try again.
11867     // ...
11868     // readLoop:
11869     // mfspr Rx,TBU # load from TBU
11870     // mfspr Ry,TB  # load from TB
11871     // mfspr Rz,TBU # load from TBU
11872     // cmpw crX,Rx,Rz # check if 'old'='new'
11873     // bne readLoop   # branch if they're not equal
11874     // ...
11875 
11876     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
11877     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11878     DebugLoc dl = MI.getDebugLoc();
11879     F->insert(It, readMBB);
11880     F->insert(It, sinkMBB);
11881 
11882     // Transfer the remainder of BB and its successor edges to sinkMBB.
11883     sinkMBB->splice(sinkMBB->begin(), BB,
11884                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11885     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11886 
11887     BB->addSuccessor(readMBB);
11888     BB = readMBB;
11889 
11890     MachineRegisterInfo &RegInfo = F->getRegInfo();
11891     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11892     Register LoReg = MI.getOperand(0).getReg();
11893     Register HiReg = MI.getOperand(1).getReg();
11894 
11895     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
11896     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
11897     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
11898 
11899     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11900 
11901     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
11902         .addReg(HiReg)
11903         .addReg(ReadAgainReg);
11904     BuildMI(BB, dl, TII->get(PPC::BCC))
11905         .addImm(PPC::PRED_NE)
11906         .addReg(CmpReg)
11907         .addMBB(readMBB);
11908 
11909     BB->addSuccessor(readMBB);
11910     BB->addSuccessor(sinkMBB);
11911   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
11912     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
11913   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
11914     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
11915   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
11916     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
11917   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
11918     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
11919 
11920   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
11921     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
11922   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
11923     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
11924   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
11925     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
11926   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
11927     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
11928 
11929   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
11930     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
11931   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
11932     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
11933   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
11934     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
11935   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
11936     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
11937 
11938   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
11939     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
11940   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
11941     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
11942   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
11943     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
11944   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
11945     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
11946 
11947   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
11948     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
11949   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
11950     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
11951   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
11952     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
11953   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
11954     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
11955 
11956   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
11957     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
11958   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
11959     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
11960   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
11961     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
11962   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
11963     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
11964 
11965   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
11966     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
11967   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
11968     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
11969   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
11970     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
11971   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
11972     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
11973 
11974   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
11975     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
11976   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
11977     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
11978   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
11979     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
11980   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
11981     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
11982 
11983   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
11984     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
11985   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
11986     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
11987   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
11988     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
11989   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
11990     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
11991 
11992   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
11993     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
11994   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
11995     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
11996   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
11997     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
11998   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
11999     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12000 
12001   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12002     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12003   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12004     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12005   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12006     BB = EmitAtomicBinary(MI, BB, 4, 0);
12007   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12008     BB = EmitAtomicBinary(MI, BB, 8, 0);
12009   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12010            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12011            (Subtarget.hasPartwordAtomics() &&
12012             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12013            (Subtarget.hasPartwordAtomics() &&
12014             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12015     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12016 
12017     auto LoadMnemonic = PPC::LDARX;
12018     auto StoreMnemonic = PPC::STDCX;
12019     switch (MI.getOpcode()) {
12020     default:
12021       llvm_unreachable("Compare and swap of unknown size");
12022     case PPC::ATOMIC_CMP_SWAP_I8:
12023       LoadMnemonic = PPC::LBARX;
12024       StoreMnemonic = PPC::STBCX;
12025       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12026       break;
12027     case PPC::ATOMIC_CMP_SWAP_I16:
12028       LoadMnemonic = PPC::LHARX;
12029       StoreMnemonic = PPC::STHCX;
12030       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12031       break;
12032     case PPC::ATOMIC_CMP_SWAP_I32:
12033       LoadMnemonic = PPC::LWARX;
12034       StoreMnemonic = PPC::STWCX;
12035       break;
12036     case PPC::ATOMIC_CMP_SWAP_I64:
12037       LoadMnemonic = PPC::LDARX;
12038       StoreMnemonic = PPC::STDCX;
12039       break;
12040     }
12041     Register dest = MI.getOperand(0).getReg();
12042     Register ptrA = MI.getOperand(1).getReg();
12043     Register ptrB = MI.getOperand(2).getReg();
12044     Register oldval = MI.getOperand(3).getReg();
12045     Register newval = MI.getOperand(4).getReg();
12046     DebugLoc dl = MI.getDebugLoc();
12047 
12048     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12049     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12050     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12051     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12052     F->insert(It, loop1MBB);
12053     F->insert(It, loop2MBB);
12054     F->insert(It, midMBB);
12055     F->insert(It, exitMBB);
12056     exitMBB->splice(exitMBB->begin(), BB,
12057                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12058     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12059 
12060     //  thisMBB:
12061     //   ...
12062     //   fallthrough --> loopMBB
12063     BB->addSuccessor(loop1MBB);
12064 
12065     // loop1MBB:
12066     //   l[bhwd]arx dest, ptr
12067     //   cmp[wd] dest, oldval
12068     //   bne- midMBB
12069     // loop2MBB:
12070     //   st[bhwd]cx. newval, ptr
12071     //   bne- loopMBB
12072     //   b exitBB
12073     // midMBB:
12074     //   st[bhwd]cx. dest, ptr
12075     // exitBB:
12076     BB = loop1MBB;
12077     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12078     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12079         .addReg(oldval)
12080         .addReg(dest);
12081     BuildMI(BB, dl, TII->get(PPC::BCC))
12082         .addImm(PPC::PRED_NE)
12083         .addReg(PPC::CR0)
12084         .addMBB(midMBB);
12085     BB->addSuccessor(loop2MBB);
12086     BB->addSuccessor(midMBB);
12087 
12088     BB = loop2MBB;
12089     BuildMI(BB, dl, TII->get(StoreMnemonic))
12090         .addReg(newval)
12091         .addReg(ptrA)
12092         .addReg(ptrB);
12093     BuildMI(BB, dl, TII->get(PPC::BCC))
12094         .addImm(PPC::PRED_NE)
12095         .addReg(PPC::CR0)
12096         .addMBB(loop1MBB);
12097     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12098     BB->addSuccessor(loop1MBB);
12099     BB->addSuccessor(exitMBB);
12100 
12101     BB = midMBB;
12102     BuildMI(BB, dl, TII->get(StoreMnemonic))
12103         .addReg(dest)
12104         .addReg(ptrA)
12105         .addReg(ptrB);
12106     BB->addSuccessor(exitMBB);
12107 
12108     //  exitMBB:
12109     //   ...
12110     BB = exitMBB;
12111   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12112              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12113     // We must use 64-bit registers for addresses when targeting 64-bit,
12114     // since we're actually doing arithmetic on them.  Other registers
12115     // can be 32-bit.
12116     bool is64bit = Subtarget.isPPC64();
12117     bool isLittleEndian = Subtarget.isLittleEndian();
12118     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12119 
12120     Register dest = MI.getOperand(0).getReg();
12121     Register ptrA = MI.getOperand(1).getReg();
12122     Register ptrB = MI.getOperand(2).getReg();
12123     Register oldval = MI.getOperand(3).getReg();
12124     Register newval = MI.getOperand(4).getReg();
12125     DebugLoc dl = MI.getDebugLoc();
12126 
12127     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12128     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12129     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12130     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12131     F->insert(It, loop1MBB);
12132     F->insert(It, loop2MBB);
12133     F->insert(It, midMBB);
12134     F->insert(It, exitMBB);
12135     exitMBB->splice(exitMBB->begin(), BB,
12136                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12137     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12138 
12139     MachineRegisterInfo &RegInfo = F->getRegInfo();
12140     const TargetRegisterClass *RC =
12141         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12142     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12143 
12144     Register PtrReg = RegInfo.createVirtualRegister(RC);
12145     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12146     Register ShiftReg =
12147         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12148     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12149     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12150     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12151     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12152     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12153     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12154     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12155     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12156     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12157     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12158     Register Ptr1Reg;
12159     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12160     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12161     //  thisMBB:
12162     //   ...
12163     //   fallthrough --> loopMBB
12164     BB->addSuccessor(loop1MBB);
12165 
12166     // The 4-byte load must be aligned, while a char or short may be
12167     // anywhere in the word.  Hence all this nasty bookkeeping code.
12168     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
12169     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12170     //   xori shift, shift1, 24 [16]
12171     //   rlwinm ptr, ptr1, 0, 0, 29
12172     //   slw newval2, newval, shift
12173     //   slw oldval2, oldval,shift
12174     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12175     //   slw mask, mask2, shift
12176     //   and newval3, newval2, mask
12177     //   and oldval3, oldval2, mask
12178     // loop1MBB:
12179     //   lwarx tmpDest, ptr
12180     //   and tmp, tmpDest, mask
12181     //   cmpw tmp, oldval3
12182     //   bne- midMBB
12183     // loop2MBB:
12184     //   andc tmp2, tmpDest, mask
12185     //   or tmp4, tmp2, newval3
12186     //   stwcx. tmp4, ptr
12187     //   bne- loop1MBB
12188     //   b exitBB
12189     // midMBB:
12190     //   stwcx. tmpDest, ptr
12191     // exitBB:
12192     //   srw dest, tmpDest, shift
12193     if (ptrA != ZeroReg) {
12194       Ptr1Reg = RegInfo.createVirtualRegister(RC);
12195       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12196           .addReg(ptrA)
12197           .addReg(ptrB);
12198     } else {
12199       Ptr1Reg = ptrB;
12200     }
12201 
12202     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12203     // mode.
12204     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12205         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12206         .addImm(3)
12207         .addImm(27)
12208         .addImm(is8bit ? 28 : 27);
12209     if (!isLittleEndian)
12210       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12211           .addReg(Shift1Reg)
12212           .addImm(is8bit ? 24 : 16);
12213     if (is64bit)
12214       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12215           .addReg(Ptr1Reg)
12216           .addImm(0)
12217           .addImm(61);
12218     else
12219       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12220           .addReg(Ptr1Reg)
12221           .addImm(0)
12222           .addImm(0)
12223           .addImm(29);
12224     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12225         .addReg(newval)
12226         .addReg(ShiftReg);
12227     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12228         .addReg(oldval)
12229         .addReg(ShiftReg);
12230     if (is8bit)
12231       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12232     else {
12233       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12234       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12235           .addReg(Mask3Reg)
12236           .addImm(65535);
12237     }
12238     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12239         .addReg(Mask2Reg)
12240         .addReg(ShiftReg);
12241     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12242         .addReg(NewVal2Reg)
12243         .addReg(MaskReg);
12244     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12245         .addReg(OldVal2Reg)
12246         .addReg(MaskReg);
12247 
12248     BB = loop1MBB;
12249     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12250         .addReg(ZeroReg)
12251         .addReg(PtrReg);
12252     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12253         .addReg(TmpDestReg)
12254         .addReg(MaskReg);
12255     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12256         .addReg(TmpReg)
12257         .addReg(OldVal3Reg);
12258     BuildMI(BB, dl, TII->get(PPC::BCC))
12259         .addImm(PPC::PRED_NE)
12260         .addReg(PPC::CR0)
12261         .addMBB(midMBB);
12262     BB->addSuccessor(loop2MBB);
12263     BB->addSuccessor(midMBB);
12264 
12265     BB = loop2MBB;
12266     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12267         .addReg(TmpDestReg)
12268         .addReg(MaskReg);
12269     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12270         .addReg(Tmp2Reg)
12271         .addReg(NewVal3Reg);
12272     BuildMI(BB, dl, TII->get(PPC::STWCX))
12273         .addReg(Tmp4Reg)
12274         .addReg(ZeroReg)
12275         .addReg(PtrReg);
12276     BuildMI(BB, dl, TII->get(PPC::BCC))
12277         .addImm(PPC::PRED_NE)
12278         .addReg(PPC::CR0)
12279         .addMBB(loop1MBB);
12280     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12281     BB->addSuccessor(loop1MBB);
12282     BB->addSuccessor(exitMBB);
12283 
12284     BB = midMBB;
12285     BuildMI(BB, dl, TII->get(PPC::STWCX))
12286         .addReg(TmpDestReg)
12287         .addReg(ZeroReg)
12288         .addReg(PtrReg);
12289     BB->addSuccessor(exitMBB);
12290 
12291     //  exitMBB:
12292     //   ...
12293     BB = exitMBB;
12294     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12295         .addReg(TmpReg)
12296         .addReg(ShiftReg);
12297   } else if (MI.getOpcode() == PPC::FADDrtz) {
12298     // This pseudo performs an FADD with rounding mode temporarily forced
12299     // to round-to-zero.  We emit this via custom inserter since the FPSCR
12300     // is not modeled at the SelectionDAG level.
12301     Register Dest = MI.getOperand(0).getReg();
12302     Register Src1 = MI.getOperand(1).getReg();
12303     Register Src2 = MI.getOperand(2).getReg();
12304     DebugLoc dl = MI.getDebugLoc();
12305 
12306     MachineRegisterInfo &RegInfo = F->getRegInfo();
12307     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12308 
12309     // Save FPSCR value.
12310     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12311 
12312     // Set rounding mode to round-to-zero.
12313     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12314         .addImm(31)
12315         .addReg(PPC::RM, RegState::ImplicitDefine);
12316 
12317     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12318         .addImm(30)
12319         .addReg(PPC::RM, RegState::ImplicitDefine);
12320 
12321     // Perform addition.
12322     auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12323                    .addReg(Src1)
12324                    .addReg(Src2);
12325     if (MI.getFlag(MachineInstr::NoFPExcept))
12326       MIB.setMIFlag(MachineInstr::NoFPExcept);
12327 
12328     // Restore FPSCR value.
12329     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12330   } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12331              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12332              MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12333              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12334     unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12335                        MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12336                           ? PPC::ANDI8_rec
12337                           : PPC::ANDI_rec;
12338     bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12339                  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12340 
12341     MachineRegisterInfo &RegInfo = F->getRegInfo();
12342     Register Dest = RegInfo.createVirtualRegister(
12343         Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12344 
12345     DebugLoc Dl = MI.getDebugLoc();
12346     BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12347         .addReg(MI.getOperand(1).getReg())
12348         .addImm(1);
12349     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12350             MI.getOperand(0).getReg())
12351         .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12352   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12353     DebugLoc Dl = MI.getDebugLoc();
12354     MachineRegisterInfo &RegInfo = F->getRegInfo();
12355     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12356     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12357     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12358             MI.getOperand(0).getReg())
12359         .addReg(CRReg);
12360   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12361     DebugLoc Dl = MI.getDebugLoc();
12362     unsigned Imm = MI.getOperand(1).getImm();
12363     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12364     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12365             MI.getOperand(0).getReg())
12366         .addReg(PPC::CR0EQ);
12367   } else if (MI.getOpcode() == PPC::SETRNDi) {
12368     DebugLoc dl = MI.getDebugLoc();
12369     Register OldFPSCRReg = MI.getOperand(0).getReg();
12370 
12371     // Save FPSCR value.
12372     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12373 
12374     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12375     // the following settings:
12376     //   00 Round to nearest
12377     //   01 Round to 0
12378     //   10 Round to +inf
12379     //   11 Round to -inf
12380 
12381     // When the operand is immediate, using the two least significant bits of
12382     // the immediate to set the bits 62:63 of FPSCR.
12383     unsigned Mode = MI.getOperand(1).getImm();
12384     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12385         .addImm(31)
12386         .addReg(PPC::RM, RegState::ImplicitDefine);
12387 
12388     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12389         .addImm(30)
12390         .addReg(PPC::RM, RegState::ImplicitDefine);
12391   } else if (MI.getOpcode() == PPC::SETRND) {
12392     DebugLoc dl = MI.getDebugLoc();
12393 
12394     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12395     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12396     // If the target doesn't have DirectMove, we should use stack to do the
12397     // conversion, because the target doesn't have the instructions like mtvsrd
12398     // or mfvsrd to do this conversion directly.
12399     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12400       if (Subtarget.hasDirectMove()) {
12401         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12402           .addReg(SrcReg);
12403       } else {
12404         // Use stack to do the register copy.
12405         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12406         MachineRegisterInfo &RegInfo = F->getRegInfo();
12407         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12408         if (RC == &PPC::F8RCRegClass) {
12409           // Copy register from F8RCRegClass to G8RCRegclass.
12410           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12411                  "Unsupported RegClass.");
12412 
12413           StoreOp = PPC::STFD;
12414           LoadOp = PPC::LD;
12415         } else {
12416           // Copy register from G8RCRegClass to F8RCRegclass.
12417           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12418                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12419                  "Unsupported RegClass.");
12420         }
12421 
12422         MachineFrameInfo &MFI = F->getFrameInfo();
12423         int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12424 
12425         MachineMemOperand *MMOStore = F->getMachineMemOperand(
12426             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12427             MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12428             MFI.getObjectAlign(FrameIdx));
12429 
12430         // Store the SrcReg into the stack.
12431         BuildMI(*BB, MI, dl, TII->get(StoreOp))
12432           .addReg(SrcReg)
12433           .addImm(0)
12434           .addFrameIndex(FrameIdx)
12435           .addMemOperand(MMOStore);
12436 
12437         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12438             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12439             MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12440             MFI.getObjectAlign(FrameIdx));
12441 
12442         // Load from the stack where SrcReg is stored, and save to DestReg,
12443         // so we have done the RegClass conversion from RegClass::SrcReg to
12444         // RegClass::DestReg.
12445         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12446           .addImm(0)
12447           .addFrameIndex(FrameIdx)
12448           .addMemOperand(MMOLoad);
12449       }
12450     };
12451 
12452     Register OldFPSCRReg = MI.getOperand(0).getReg();
12453 
12454     // Save FPSCR value.
12455     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12456 
12457     // When the operand is gprc register, use two least significant bits of the
12458     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12459     //
12460     // copy OldFPSCRTmpReg, OldFPSCRReg
12461     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12462     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12463     // copy NewFPSCRReg, NewFPSCRTmpReg
12464     // mtfsf 255, NewFPSCRReg
12465     MachineOperand SrcOp = MI.getOperand(1);
12466     MachineRegisterInfo &RegInfo = F->getRegInfo();
12467     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12468 
12469     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12470 
12471     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12472     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12473 
12474     // The first operand of INSERT_SUBREG should be a register which has
12475     // subregisters, we only care about its RegClass, so we should use an
12476     // IMPLICIT_DEF register.
12477     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12478     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12479       .addReg(ImDefReg)
12480       .add(SrcOp)
12481       .addImm(1);
12482 
12483     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12484     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12485       .addReg(OldFPSCRTmpReg)
12486       .addReg(ExtSrcReg)
12487       .addImm(0)
12488       .addImm(62);
12489 
12490     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12491     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12492 
12493     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12494     // bits of FPSCR.
12495     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12496       .addImm(255)
12497       .addReg(NewFPSCRReg)
12498       .addImm(0)
12499       .addImm(0);
12500   } else if (MI.getOpcode() == PPC::SETFLM) {
12501     DebugLoc Dl = MI.getDebugLoc();
12502 
12503     // Result of setflm is previous FPSCR content, so we need to save it first.
12504     Register OldFPSCRReg = MI.getOperand(0).getReg();
12505     BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12506 
12507     // Put bits in 32:63 to FPSCR.
12508     Register NewFPSCRReg = MI.getOperand(1).getReg();
12509     BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12510         .addImm(255)
12511         .addReg(NewFPSCRReg)
12512         .addImm(0)
12513         .addImm(0);
12514   } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12515              MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12516     return emitProbedAlloca(MI, BB);
12517   } else {
12518     llvm_unreachable("Unexpected instr type to insert");
12519   }
12520 
12521   MI.eraseFromParent(); // The pseudo instruction is gone now.
12522   return BB;
12523 }
12524 
12525 //===----------------------------------------------------------------------===//
12526 // Target Optimization Hooks
12527 //===----------------------------------------------------------------------===//
12528 
12529 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12530   // For the estimates, convergence is quadratic, so we essentially double the
12531   // number of digits correct after every iteration. For both FRE and FRSQRTE,
12532   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12533   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12534   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12535   if (VT.getScalarType() == MVT::f64)
12536     RefinementSteps++;
12537   return RefinementSteps;
12538 }
12539 
12540 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12541                                            int Enabled, int &RefinementSteps,
12542                                            bool &UseOneConstNR,
12543                                            bool Reciprocal) const {
12544   EVT VT = Operand.getValueType();
12545   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12546       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12547       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12548       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12549     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12550       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12551 
12552     // The Newton-Raphson computation with a single constant does not provide
12553     // enough accuracy on some CPUs.
12554     UseOneConstNR = !Subtarget.needsTwoConstNR();
12555     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12556   }
12557   return SDValue();
12558 }
12559 
12560 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12561                                             int Enabled,
12562                                             int &RefinementSteps) const {
12563   EVT VT = Operand.getValueType();
12564   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12565       (VT == MVT::f64 && Subtarget.hasFRE()) ||
12566       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12567       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12568     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12569       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12570     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12571   }
12572   return SDValue();
12573 }
12574 
12575 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12576   // Note: This functionality is used only when unsafe-fp-math is enabled, and
12577   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12578   // enabled for division), this functionality is redundant with the default
12579   // combiner logic (once the division -> reciprocal/multiply transformation
12580   // has taken place). As a result, this matters more for older cores than for
12581   // newer ones.
12582 
12583   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12584   // reciprocal if there are two or more FDIVs (for embedded cores with only
12585   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12586   switch (Subtarget.getCPUDirective()) {
12587   default:
12588     return 3;
12589   case PPC::DIR_440:
12590   case PPC::DIR_A2:
12591   case PPC::DIR_E500:
12592   case PPC::DIR_E500mc:
12593   case PPC::DIR_E5500:
12594     return 2;
12595   }
12596 }
12597 
12598 // isConsecutiveLSLoc needs to work even if all adds have not yet been
12599 // collapsed, and so we need to look through chains of them.
12600 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
12601                                      int64_t& Offset, SelectionDAG &DAG) {
12602   if (DAG.isBaseWithConstantOffset(Loc)) {
12603     Base = Loc.getOperand(0);
12604     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12605 
12606     // The base might itself be a base plus an offset, and if so, accumulate
12607     // that as well.
12608     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12609   }
12610 }
12611 
12612 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12613                             unsigned Bytes, int Dist,
12614                             SelectionDAG &DAG) {
12615   if (VT.getSizeInBits() / 8 != Bytes)
12616     return false;
12617 
12618   SDValue BaseLoc = Base->getBasePtr();
12619   if (Loc.getOpcode() == ISD::FrameIndex) {
12620     if (BaseLoc.getOpcode() != ISD::FrameIndex)
12621       return false;
12622     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12623     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
12624     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12625     int FS  = MFI.getObjectSize(FI);
12626     int BFS = MFI.getObjectSize(BFI);
12627     if (FS != BFS || FS != (int)Bytes) return false;
12628     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12629   }
12630 
12631   SDValue Base1 = Loc, Base2 = BaseLoc;
12632   int64_t Offset1 = 0, Offset2 = 0;
12633   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12634   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12635   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12636     return true;
12637 
12638   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12639   const GlobalValue *GV1 = nullptr;
12640   const GlobalValue *GV2 = nullptr;
12641   Offset1 = 0;
12642   Offset2 = 0;
12643   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12644   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12645   if (isGA1 && isGA2 && GV1 == GV2)
12646     return Offset1 == (Offset2 + Dist*Bytes);
12647   return false;
12648 }
12649 
12650 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12651 // not enforce equality of the chain operands.
12652 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12653                             unsigned Bytes, int Dist,
12654                             SelectionDAG &DAG) {
12655   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12656     EVT VT = LS->getMemoryVT();
12657     SDValue Loc = LS->getBasePtr();
12658     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12659   }
12660 
12661   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12662     EVT VT;
12663     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12664     default: return false;
12665     case Intrinsic::ppc_altivec_lvx:
12666     case Intrinsic::ppc_altivec_lvxl:
12667     case Intrinsic::ppc_vsx_lxvw4x:
12668     case Intrinsic::ppc_vsx_lxvw4x_be:
12669       VT = MVT::v4i32;
12670       break;
12671     case Intrinsic::ppc_vsx_lxvd2x:
12672     case Intrinsic::ppc_vsx_lxvd2x_be:
12673       VT = MVT::v2f64;
12674       break;
12675     case Intrinsic::ppc_altivec_lvebx:
12676       VT = MVT::i8;
12677       break;
12678     case Intrinsic::ppc_altivec_lvehx:
12679       VT = MVT::i16;
12680       break;
12681     case Intrinsic::ppc_altivec_lvewx:
12682       VT = MVT::i32;
12683       break;
12684     }
12685 
12686     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12687   }
12688 
12689   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12690     EVT VT;
12691     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12692     default: return false;
12693     case Intrinsic::ppc_altivec_stvx:
12694     case Intrinsic::ppc_altivec_stvxl:
12695     case Intrinsic::ppc_vsx_stxvw4x:
12696       VT = MVT::v4i32;
12697       break;
12698     case Intrinsic::ppc_vsx_stxvd2x:
12699       VT = MVT::v2f64;
12700       break;
12701     case Intrinsic::ppc_vsx_stxvw4x_be:
12702       VT = MVT::v4i32;
12703       break;
12704     case Intrinsic::ppc_vsx_stxvd2x_be:
12705       VT = MVT::v2f64;
12706       break;
12707     case Intrinsic::ppc_altivec_stvebx:
12708       VT = MVT::i8;
12709       break;
12710     case Intrinsic::ppc_altivec_stvehx:
12711       VT = MVT::i16;
12712       break;
12713     case Intrinsic::ppc_altivec_stvewx:
12714       VT = MVT::i32;
12715       break;
12716     }
12717 
12718     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12719   }
12720 
12721   return false;
12722 }
12723 
12724 // Return true is there is a nearyby consecutive load to the one provided
12725 // (regardless of alignment). We search up and down the chain, looking though
12726 // token factors and other loads (but nothing else). As a result, a true result
12727 // indicates that it is safe to create a new consecutive load adjacent to the
12728 // load provided.
12729 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
12730   SDValue Chain = LD->getChain();
12731   EVT VT = LD->getMemoryVT();
12732 
12733   SmallSet<SDNode *, 16> LoadRoots;
12734   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12735   SmallSet<SDNode *, 16> Visited;
12736 
12737   // First, search up the chain, branching to follow all token-factor operands.
12738   // If we find a consecutive load, then we're done, otherwise, record all
12739   // nodes just above the top-level loads and token factors.
12740   while (!Queue.empty()) {
12741     SDNode *ChainNext = Queue.pop_back_val();
12742     if (!Visited.insert(ChainNext).second)
12743       continue;
12744 
12745     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12746       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12747         return true;
12748 
12749       if (!Visited.count(ChainLD->getChain().getNode()))
12750         Queue.push_back(ChainLD->getChain().getNode());
12751     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12752       for (const SDUse &O : ChainNext->ops())
12753         if (!Visited.count(O.getNode()))
12754           Queue.push_back(O.getNode());
12755     } else
12756       LoadRoots.insert(ChainNext);
12757   }
12758 
12759   // Second, search down the chain, starting from the top-level nodes recorded
12760   // in the first phase. These top-level nodes are the nodes just above all
12761   // loads and token factors. Starting with their uses, recursively look though
12762   // all loads (just the chain uses) and token factors to find a consecutive
12763   // load.
12764   Visited.clear();
12765   Queue.clear();
12766 
12767   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12768        IE = LoadRoots.end(); I != IE; ++I) {
12769     Queue.push_back(*I);
12770 
12771     while (!Queue.empty()) {
12772       SDNode *LoadRoot = Queue.pop_back_val();
12773       if (!Visited.insert(LoadRoot).second)
12774         continue;
12775 
12776       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12777         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12778           return true;
12779 
12780       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12781            UE = LoadRoot->use_end(); UI != UE; ++UI)
12782         if (((isa<MemSDNode>(*UI) &&
12783             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12784             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12785           Queue.push_back(*UI);
12786     }
12787   }
12788 
12789   return false;
12790 }
12791 
12792 /// This function is called when we have proved that a SETCC node can be replaced
12793 /// by subtraction (and other supporting instructions) so that the result of
12794 /// comparison is kept in a GPR instead of CR. This function is purely for
12795 /// codegen purposes and has some flags to guide the codegen process.
12796 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12797                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12798   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12799 
12800   // Zero extend the operands to the largest legal integer. Originally, they
12801   // must be of a strictly smaller size.
12802   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12803                          DAG.getConstant(Size, DL, MVT::i32));
12804   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12805                          DAG.getConstant(Size, DL, MVT::i32));
12806 
12807   // Swap if needed. Depends on the condition code.
12808   if (Swap)
12809     std::swap(Op0, Op1);
12810 
12811   // Subtract extended integers.
12812   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12813 
12814   // Move the sign bit to the least significant position and zero out the rest.
12815   // Now the least significant bit carries the result of original comparison.
12816   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12817                              DAG.getConstant(Size - 1, DL, MVT::i32));
12818   auto Final = Shifted;
12819 
12820   // Complement the result if needed. Based on the condition code.
12821   if (Complement)
12822     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12823                         DAG.getConstant(1, DL, MVT::i64));
12824 
12825   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
12826 }
12827 
12828 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
12829                                                   DAGCombinerInfo &DCI) const {
12830   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12831 
12832   SelectionDAG &DAG = DCI.DAG;
12833   SDLoc DL(N);
12834 
12835   // Size of integers being compared has a critical role in the following
12836   // analysis, so we prefer to do this when all types are legal.
12837   if (!DCI.isAfterLegalizeDAG())
12838     return SDValue();
12839 
12840   // If all users of SETCC extend its value to a legal integer type
12841   // then we replace SETCC with a subtraction
12842   for (SDNode::use_iterator UI = N->use_begin(),
12843        UE = N->use_end(); UI != UE; ++UI) {
12844     if (UI->getOpcode() != ISD::ZERO_EXTEND)
12845       return SDValue();
12846   }
12847 
12848   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12849   auto OpSize = N->getOperand(0).getValueSizeInBits();
12850 
12851   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
12852 
12853   if (OpSize < Size) {
12854     switch (CC) {
12855     default: break;
12856     case ISD::SETULT:
12857       return generateEquivalentSub(N, Size, false, false, DL, DAG);
12858     case ISD::SETULE:
12859       return generateEquivalentSub(N, Size, true, true, DL, DAG);
12860     case ISD::SETUGT:
12861       return generateEquivalentSub(N, Size, false, true, DL, DAG);
12862     case ISD::SETUGE:
12863       return generateEquivalentSub(N, Size, true, false, DL, DAG);
12864     }
12865   }
12866 
12867   return SDValue();
12868 }
12869 
12870 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
12871                                                   DAGCombinerInfo &DCI) const {
12872   SelectionDAG &DAG = DCI.DAG;
12873   SDLoc dl(N);
12874 
12875   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
12876   // If we're tracking CR bits, we need to be careful that we don't have:
12877   //   trunc(binary-ops(zext(x), zext(y)))
12878   // or
12879   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
12880   // such that we're unnecessarily moving things into GPRs when it would be
12881   // better to keep them in CR bits.
12882 
12883   // Note that trunc here can be an actual i1 trunc, or can be the effective
12884   // truncation that comes from a setcc or select_cc.
12885   if (N->getOpcode() == ISD::TRUNCATE &&
12886       N->getValueType(0) != MVT::i1)
12887     return SDValue();
12888 
12889   if (N->getOperand(0).getValueType() != MVT::i32 &&
12890       N->getOperand(0).getValueType() != MVT::i64)
12891     return SDValue();
12892 
12893   if (N->getOpcode() == ISD::SETCC ||
12894       N->getOpcode() == ISD::SELECT_CC) {
12895     // If we're looking at a comparison, then we need to make sure that the
12896     // high bits (all except for the first) don't matter the result.
12897     ISD::CondCode CC =
12898       cast<CondCodeSDNode>(N->getOperand(
12899         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
12900     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
12901 
12902     if (ISD::isSignedIntSetCC(CC)) {
12903       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
12904           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
12905         return SDValue();
12906     } else if (ISD::isUnsignedIntSetCC(CC)) {
12907       if (!DAG.MaskedValueIsZero(N->getOperand(0),
12908                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
12909           !DAG.MaskedValueIsZero(N->getOperand(1),
12910                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
12911         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
12912                                              : SDValue());
12913     } else {
12914       // This is neither a signed nor an unsigned comparison, just make sure
12915       // that the high bits are equal.
12916       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
12917       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
12918 
12919       // We don't really care about what is known about the first bit (if
12920       // anything), so clear it in all masks prior to comparing them.
12921       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
12922       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
12923 
12924       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
12925         return SDValue();
12926     }
12927   }
12928 
12929   // We now know that the higher-order bits are irrelevant, we just need to
12930   // make sure that all of the intermediate operations are bit operations, and
12931   // all inputs are extensions.
12932   if (N->getOperand(0).getOpcode() != ISD::AND &&
12933       N->getOperand(0).getOpcode() != ISD::OR  &&
12934       N->getOperand(0).getOpcode() != ISD::XOR &&
12935       N->getOperand(0).getOpcode() != ISD::SELECT &&
12936       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
12937       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
12938       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
12939       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
12940       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
12941     return SDValue();
12942 
12943   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
12944       N->getOperand(1).getOpcode() != ISD::AND &&
12945       N->getOperand(1).getOpcode() != ISD::OR  &&
12946       N->getOperand(1).getOpcode() != ISD::XOR &&
12947       N->getOperand(1).getOpcode() != ISD::SELECT &&
12948       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
12949       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
12950       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
12951       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
12952       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
12953     return SDValue();
12954 
12955   SmallVector<SDValue, 4> Inputs;
12956   SmallVector<SDValue, 8> BinOps, PromOps;
12957   SmallPtrSet<SDNode *, 16> Visited;
12958 
12959   for (unsigned i = 0; i < 2; ++i) {
12960     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12961           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12962           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12963           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12964         isa<ConstantSDNode>(N->getOperand(i)))
12965       Inputs.push_back(N->getOperand(i));
12966     else
12967       BinOps.push_back(N->getOperand(i));
12968 
12969     if (N->getOpcode() == ISD::TRUNCATE)
12970       break;
12971   }
12972 
12973   // Visit all inputs, collect all binary operations (and, or, xor and
12974   // select) that are all fed by extensions.
12975   while (!BinOps.empty()) {
12976     SDValue BinOp = BinOps.back();
12977     BinOps.pop_back();
12978 
12979     if (!Visited.insert(BinOp.getNode()).second)
12980       continue;
12981 
12982     PromOps.push_back(BinOp);
12983 
12984     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12985       // The condition of the select is not promoted.
12986       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12987         continue;
12988       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12989         continue;
12990 
12991       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12992             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12993             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12994            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12995           isa<ConstantSDNode>(BinOp.getOperand(i))) {
12996         Inputs.push_back(BinOp.getOperand(i));
12997       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12998                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
12999                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13000                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13001                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13002                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13003                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13004                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13005                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13006         BinOps.push_back(BinOp.getOperand(i));
13007       } else {
13008         // We have an input that is not an extension or another binary
13009         // operation; we'll abort this transformation.
13010         return SDValue();
13011       }
13012     }
13013   }
13014 
13015   // Make sure that this is a self-contained cluster of operations (which
13016   // is not quite the same thing as saying that everything has only one
13017   // use).
13018   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13019     if (isa<ConstantSDNode>(Inputs[i]))
13020       continue;
13021 
13022     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13023                               UE = Inputs[i].getNode()->use_end();
13024          UI != UE; ++UI) {
13025       SDNode *User = *UI;
13026       if (User != N && !Visited.count(User))
13027         return SDValue();
13028 
13029       // Make sure that we're not going to promote the non-output-value
13030       // operand(s) or SELECT or SELECT_CC.
13031       // FIXME: Although we could sometimes handle this, and it does occur in
13032       // practice that one of the condition inputs to the select is also one of
13033       // the outputs, we currently can't deal with this.
13034       if (User->getOpcode() == ISD::SELECT) {
13035         if (User->getOperand(0) == Inputs[i])
13036           return SDValue();
13037       } else if (User->getOpcode() == ISD::SELECT_CC) {
13038         if (User->getOperand(0) == Inputs[i] ||
13039             User->getOperand(1) == Inputs[i])
13040           return SDValue();
13041       }
13042     }
13043   }
13044 
13045   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13046     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13047                               UE = PromOps[i].getNode()->use_end();
13048          UI != UE; ++UI) {
13049       SDNode *User = *UI;
13050       if (User != N && !Visited.count(User))
13051         return SDValue();
13052 
13053       // Make sure that we're not going to promote the non-output-value
13054       // operand(s) or SELECT or SELECT_CC.
13055       // FIXME: Although we could sometimes handle this, and it does occur in
13056       // practice that one of the condition inputs to the select is also one of
13057       // the outputs, we currently can't deal with this.
13058       if (User->getOpcode() == ISD::SELECT) {
13059         if (User->getOperand(0) == PromOps[i])
13060           return SDValue();
13061       } else if (User->getOpcode() == ISD::SELECT_CC) {
13062         if (User->getOperand(0) == PromOps[i] ||
13063             User->getOperand(1) == PromOps[i])
13064           return SDValue();
13065       }
13066     }
13067   }
13068 
13069   // Replace all inputs with the extension operand.
13070   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13071     // Constants may have users outside the cluster of to-be-promoted nodes,
13072     // and so we need to replace those as we do the promotions.
13073     if (isa<ConstantSDNode>(Inputs[i]))
13074       continue;
13075     else
13076       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13077   }
13078 
13079   std::list<HandleSDNode> PromOpHandles;
13080   for (auto &PromOp : PromOps)
13081     PromOpHandles.emplace_back(PromOp);
13082 
13083   // Replace all operations (these are all the same, but have a different
13084   // (i1) return type). DAG.getNode will validate that the types of
13085   // a binary operator match, so go through the list in reverse so that
13086   // we've likely promoted both operands first. Any intermediate truncations or
13087   // extensions disappear.
13088   while (!PromOpHandles.empty()) {
13089     SDValue PromOp = PromOpHandles.back().getValue();
13090     PromOpHandles.pop_back();
13091 
13092     if (PromOp.getOpcode() == ISD::TRUNCATE ||
13093         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13094         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13095         PromOp.getOpcode() == ISD::ANY_EXTEND) {
13096       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13097           PromOp.getOperand(0).getValueType() != MVT::i1) {
13098         // The operand is not yet ready (see comment below).
13099         PromOpHandles.emplace_front(PromOp);
13100         continue;
13101       }
13102 
13103       SDValue RepValue = PromOp.getOperand(0);
13104       if (isa<ConstantSDNode>(RepValue))
13105         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13106 
13107       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13108       continue;
13109     }
13110 
13111     unsigned C;
13112     switch (PromOp.getOpcode()) {
13113     default:             C = 0; break;
13114     case ISD::SELECT:    C = 1; break;
13115     case ISD::SELECT_CC: C = 2; break;
13116     }
13117 
13118     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13119          PromOp.getOperand(C).getValueType() != MVT::i1) ||
13120         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13121          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13122       // The to-be-promoted operands of this node have not yet been
13123       // promoted (this should be rare because we're going through the
13124       // list backward, but if one of the operands has several users in
13125       // this cluster of to-be-promoted nodes, it is possible).
13126       PromOpHandles.emplace_front(PromOp);
13127       continue;
13128     }
13129 
13130     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13131                                 PromOp.getNode()->op_end());
13132 
13133     // If there are any constant inputs, make sure they're replaced now.
13134     for (unsigned i = 0; i < 2; ++i)
13135       if (isa<ConstantSDNode>(Ops[C+i]))
13136         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13137 
13138     DAG.ReplaceAllUsesOfValueWith(PromOp,
13139       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13140   }
13141 
13142   // Now we're left with the initial truncation itself.
13143   if (N->getOpcode() == ISD::TRUNCATE)
13144     return N->getOperand(0);
13145 
13146   // Otherwise, this is a comparison. The operands to be compared have just
13147   // changed type (to i1), but everything else is the same.
13148   return SDValue(N, 0);
13149 }
13150 
13151 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13152                                                   DAGCombinerInfo &DCI) const {
13153   SelectionDAG &DAG = DCI.DAG;
13154   SDLoc dl(N);
13155 
13156   // If we're tracking CR bits, we need to be careful that we don't have:
13157   //   zext(binary-ops(trunc(x), trunc(y)))
13158   // or
13159   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13160   // such that we're unnecessarily moving things into CR bits that can more
13161   // efficiently stay in GPRs. Note that if we're not certain that the high
13162   // bits are set as required by the final extension, we still may need to do
13163   // some masking to get the proper behavior.
13164 
13165   // This same functionality is important on PPC64 when dealing with
13166   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13167   // the return values of functions. Because it is so similar, it is handled
13168   // here as well.
13169 
13170   if (N->getValueType(0) != MVT::i32 &&
13171       N->getValueType(0) != MVT::i64)
13172     return SDValue();
13173 
13174   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13175         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13176     return SDValue();
13177 
13178   if (N->getOperand(0).getOpcode() != ISD::AND &&
13179       N->getOperand(0).getOpcode() != ISD::OR  &&
13180       N->getOperand(0).getOpcode() != ISD::XOR &&
13181       N->getOperand(0).getOpcode() != ISD::SELECT &&
13182       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13183     return SDValue();
13184 
13185   SmallVector<SDValue, 4> Inputs;
13186   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13187   SmallPtrSet<SDNode *, 16> Visited;
13188 
13189   // Visit all inputs, collect all binary operations (and, or, xor and
13190   // select) that are all fed by truncations.
13191   while (!BinOps.empty()) {
13192     SDValue BinOp = BinOps.back();
13193     BinOps.pop_back();
13194 
13195     if (!Visited.insert(BinOp.getNode()).second)
13196       continue;
13197 
13198     PromOps.push_back(BinOp);
13199 
13200     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13201       // The condition of the select is not promoted.
13202       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13203         continue;
13204       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13205         continue;
13206 
13207       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13208           isa<ConstantSDNode>(BinOp.getOperand(i))) {
13209         Inputs.push_back(BinOp.getOperand(i));
13210       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13211                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
13212                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13213                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13214                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13215         BinOps.push_back(BinOp.getOperand(i));
13216       } else {
13217         // We have an input that is not a truncation or another binary
13218         // operation; we'll abort this transformation.
13219         return SDValue();
13220       }
13221     }
13222   }
13223 
13224   // The operands of a select that must be truncated when the select is
13225   // promoted because the operand is actually part of the to-be-promoted set.
13226   DenseMap<SDNode *, EVT> SelectTruncOp[2];
13227 
13228   // Make sure that this is a self-contained cluster of operations (which
13229   // is not quite the same thing as saying that everything has only one
13230   // use).
13231   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13232     if (isa<ConstantSDNode>(Inputs[i]))
13233       continue;
13234 
13235     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13236                               UE = Inputs[i].getNode()->use_end();
13237          UI != UE; ++UI) {
13238       SDNode *User = *UI;
13239       if (User != N && !Visited.count(User))
13240         return SDValue();
13241 
13242       // If we're going to promote the non-output-value operand(s) or SELECT or
13243       // SELECT_CC, record them for truncation.
13244       if (User->getOpcode() == ISD::SELECT) {
13245         if (User->getOperand(0) == Inputs[i])
13246           SelectTruncOp[0].insert(std::make_pair(User,
13247                                     User->getOperand(0).getValueType()));
13248       } else if (User->getOpcode() == ISD::SELECT_CC) {
13249         if (User->getOperand(0) == Inputs[i])
13250           SelectTruncOp[0].insert(std::make_pair(User,
13251                                     User->getOperand(0).getValueType()));
13252         if (User->getOperand(1) == Inputs[i])
13253           SelectTruncOp[1].insert(std::make_pair(User,
13254                                     User->getOperand(1).getValueType()));
13255       }
13256     }
13257   }
13258 
13259   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13260     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13261                               UE = PromOps[i].getNode()->use_end();
13262          UI != UE; ++UI) {
13263       SDNode *User = *UI;
13264       if (User != N && !Visited.count(User))
13265         return SDValue();
13266 
13267       // If we're going to promote the non-output-value operand(s) or SELECT or
13268       // SELECT_CC, record them for truncation.
13269       if (User->getOpcode() == ISD::SELECT) {
13270         if (User->getOperand(0) == PromOps[i])
13271           SelectTruncOp[0].insert(std::make_pair(User,
13272                                     User->getOperand(0).getValueType()));
13273       } else if (User->getOpcode() == ISD::SELECT_CC) {
13274         if (User->getOperand(0) == PromOps[i])
13275           SelectTruncOp[0].insert(std::make_pair(User,
13276                                     User->getOperand(0).getValueType()));
13277         if (User->getOperand(1) == PromOps[i])
13278           SelectTruncOp[1].insert(std::make_pair(User,
13279                                     User->getOperand(1).getValueType()));
13280       }
13281     }
13282   }
13283 
13284   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13285   bool ReallyNeedsExt = false;
13286   if (N->getOpcode() != ISD::ANY_EXTEND) {
13287     // If all of the inputs are not already sign/zero extended, then
13288     // we'll still need to do that at the end.
13289     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13290       if (isa<ConstantSDNode>(Inputs[i]))
13291         continue;
13292 
13293       unsigned OpBits =
13294         Inputs[i].getOperand(0).getValueSizeInBits();
13295       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13296 
13297       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13298            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13299                                   APInt::getHighBitsSet(OpBits,
13300                                                         OpBits-PromBits))) ||
13301           (N->getOpcode() == ISD::SIGN_EXTEND &&
13302            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13303              (OpBits-(PromBits-1)))) {
13304         ReallyNeedsExt = true;
13305         break;
13306       }
13307     }
13308   }
13309 
13310   // Replace all inputs, either with the truncation operand, or a
13311   // truncation or extension to the final output type.
13312   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13313     // Constant inputs need to be replaced with the to-be-promoted nodes that
13314     // use them because they might have users outside of the cluster of
13315     // promoted nodes.
13316     if (isa<ConstantSDNode>(Inputs[i]))
13317       continue;
13318 
13319     SDValue InSrc = Inputs[i].getOperand(0);
13320     if (Inputs[i].getValueType() == N->getValueType(0))
13321       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13322     else if (N->getOpcode() == ISD::SIGN_EXTEND)
13323       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13324         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13325     else if (N->getOpcode() == ISD::ZERO_EXTEND)
13326       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13327         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13328     else
13329       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13330         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13331   }
13332 
13333   std::list<HandleSDNode> PromOpHandles;
13334   for (auto &PromOp : PromOps)
13335     PromOpHandles.emplace_back(PromOp);
13336 
13337   // Replace all operations (these are all the same, but have a different
13338   // (promoted) return type). DAG.getNode will validate that the types of
13339   // a binary operator match, so go through the list in reverse so that
13340   // we've likely promoted both operands first.
13341   while (!PromOpHandles.empty()) {
13342     SDValue PromOp = PromOpHandles.back().getValue();
13343     PromOpHandles.pop_back();
13344 
13345     unsigned C;
13346     switch (PromOp.getOpcode()) {
13347     default:             C = 0; break;
13348     case ISD::SELECT:    C = 1; break;
13349     case ISD::SELECT_CC: C = 2; break;
13350     }
13351 
13352     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13353          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13354         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13355          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13356       // The to-be-promoted operands of this node have not yet been
13357       // promoted (this should be rare because we're going through the
13358       // list backward, but if one of the operands has several users in
13359       // this cluster of to-be-promoted nodes, it is possible).
13360       PromOpHandles.emplace_front(PromOp);
13361       continue;
13362     }
13363 
13364     // For SELECT and SELECT_CC nodes, we do a similar check for any
13365     // to-be-promoted comparison inputs.
13366     if (PromOp.getOpcode() == ISD::SELECT ||
13367         PromOp.getOpcode() == ISD::SELECT_CC) {
13368       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13369            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13370           (SelectTruncOp[1].count(PromOp.getNode()) &&
13371            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13372         PromOpHandles.emplace_front(PromOp);
13373         continue;
13374       }
13375     }
13376 
13377     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13378                                 PromOp.getNode()->op_end());
13379 
13380     // If this node has constant inputs, then they'll need to be promoted here.
13381     for (unsigned i = 0; i < 2; ++i) {
13382       if (!isa<ConstantSDNode>(Ops[C+i]))
13383         continue;
13384       if (Ops[C+i].getValueType() == N->getValueType(0))
13385         continue;
13386 
13387       if (N->getOpcode() == ISD::SIGN_EXTEND)
13388         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13389       else if (N->getOpcode() == ISD::ZERO_EXTEND)
13390         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13391       else
13392         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13393     }
13394 
13395     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13396     // truncate them again to the original value type.
13397     if (PromOp.getOpcode() == ISD::SELECT ||
13398         PromOp.getOpcode() == ISD::SELECT_CC) {
13399       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13400       if (SI0 != SelectTruncOp[0].end())
13401         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13402       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13403       if (SI1 != SelectTruncOp[1].end())
13404         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13405     }
13406 
13407     DAG.ReplaceAllUsesOfValueWith(PromOp,
13408       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13409   }
13410 
13411   // Now we're left with the initial extension itself.
13412   if (!ReallyNeedsExt)
13413     return N->getOperand(0);
13414 
13415   // To zero extend, just mask off everything except for the first bit (in the
13416   // i1 case).
13417   if (N->getOpcode() == ISD::ZERO_EXTEND)
13418     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13419                        DAG.getConstant(APInt::getLowBitsSet(
13420                                          N->getValueSizeInBits(0), PromBits),
13421                                        dl, N->getValueType(0)));
13422 
13423   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13424          "Invalid extension type");
13425   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13426   SDValue ShiftCst =
13427       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13428   return DAG.getNode(
13429       ISD::SRA, dl, N->getValueType(0),
13430       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13431       ShiftCst);
13432 }
13433 
13434 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13435                                         DAGCombinerInfo &DCI) const {
13436   assert(N->getOpcode() == ISD::SETCC &&
13437          "Should be called with a SETCC node");
13438 
13439   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13440   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13441     SDValue LHS = N->getOperand(0);
13442     SDValue RHS = N->getOperand(1);
13443 
13444     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13445     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13446         LHS.hasOneUse())
13447       std::swap(LHS, RHS);
13448 
13449     // x == 0-y --> x+y == 0
13450     // x != 0-y --> x+y != 0
13451     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13452         RHS.hasOneUse()) {
13453       SDLoc DL(N);
13454       SelectionDAG &DAG = DCI.DAG;
13455       EVT VT = N->getValueType(0);
13456       EVT OpVT = LHS.getValueType();
13457       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13458       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13459     }
13460   }
13461 
13462   return DAGCombineTruncBoolExt(N, DCI);
13463 }
13464 
13465 // Is this an extending load from an f32 to an f64?
13466 static bool isFPExtLoad(SDValue Op) {
13467   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13468     return LD->getExtensionType() == ISD::EXTLOAD &&
13469       Op.getValueType() == MVT::f64;
13470   return false;
13471 }
13472 
13473 /// Reduces the number of fp-to-int conversion when building a vector.
13474 ///
13475 /// If this vector is built out of floating to integer conversions,
13476 /// transform it to a vector built out of floating point values followed by a
13477 /// single floating to integer conversion of the vector.
13478 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
13479 /// becomes (fptosi (build_vector ($A, $B, ...)))
13480 SDValue PPCTargetLowering::
13481 combineElementTruncationToVectorTruncation(SDNode *N,
13482                                            DAGCombinerInfo &DCI) const {
13483   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13484          "Should be called with a BUILD_VECTOR node");
13485 
13486   SelectionDAG &DAG = DCI.DAG;
13487   SDLoc dl(N);
13488 
13489   SDValue FirstInput = N->getOperand(0);
13490   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13491          "The input operand must be an fp-to-int conversion.");
13492 
13493   // This combine happens after legalization so the fp_to_[su]i nodes are
13494   // already converted to PPCSISD nodes.
13495   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13496   if (FirstConversion == PPCISD::FCTIDZ ||
13497       FirstConversion == PPCISD::FCTIDUZ ||
13498       FirstConversion == PPCISD::FCTIWZ ||
13499       FirstConversion == PPCISD::FCTIWUZ) {
13500     bool IsSplat = true;
13501     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13502       FirstConversion == PPCISD::FCTIWUZ;
13503     EVT SrcVT = FirstInput.getOperand(0).getValueType();
13504     SmallVector<SDValue, 4> Ops;
13505     EVT TargetVT = N->getValueType(0);
13506     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13507       SDValue NextOp = N->getOperand(i);
13508       if (NextOp.getOpcode() != PPCISD::MFVSR)
13509         return SDValue();
13510       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13511       if (NextConversion != FirstConversion)
13512         return SDValue();
13513       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13514       // This is not valid if the input was originally double precision. It is
13515       // also not profitable to do unless this is an extending load in which
13516       // case doing this combine will allow us to combine consecutive loads.
13517       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13518         return SDValue();
13519       if (N->getOperand(i) != FirstInput)
13520         IsSplat = false;
13521     }
13522 
13523     // If this is a splat, we leave it as-is since there will be only a single
13524     // fp-to-int conversion followed by a splat of the integer. This is better
13525     // for 32-bit and smaller ints and neutral for 64-bit ints.
13526     if (IsSplat)
13527       return SDValue();
13528 
13529     // Now that we know we have the right type of node, get its operands
13530     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13531       SDValue In = N->getOperand(i).getOperand(0);
13532       if (Is32Bit) {
13533         // For 32-bit values, we need to add an FP_ROUND node (if we made it
13534         // here, we know that all inputs are extending loads so this is safe).
13535         if (In.isUndef())
13536           Ops.push_back(DAG.getUNDEF(SrcVT));
13537         else {
13538           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13539                                       MVT::f32, In.getOperand(0),
13540                                       DAG.getIntPtrConstant(1, dl));
13541           Ops.push_back(Trunc);
13542         }
13543       } else
13544         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13545     }
13546 
13547     unsigned Opcode;
13548     if (FirstConversion == PPCISD::FCTIDZ ||
13549         FirstConversion == PPCISD::FCTIWZ)
13550       Opcode = ISD::FP_TO_SINT;
13551     else
13552       Opcode = ISD::FP_TO_UINT;
13553 
13554     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13555     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13556     return DAG.getNode(Opcode, dl, TargetVT, BV);
13557   }
13558   return SDValue();
13559 }
13560 
13561 /// Reduce the number of loads when building a vector.
13562 ///
13563 /// Building a vector out of multiple loads can be converted to a load
13564 /// of the vector type if the loads are consecutive. If the loads are
13565 /// consecutive but in descending order, a shuffle is added at the end
13566 /// to reorder the vector.
13567 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
13568   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13569          "Should be called with a BUILD_VECTOR node");
13570 
13571   SDLoc dl(N);
13572 
13573   // Return early for non byte-sized type, as they can't be consecutive.
13574   if (!N->getValueType(0).getVectorElementType().isByteSized())
13575     return SDValue();
13576 
13577   bool InputsAreConsecutiveLoads = true;
13578   bool InputsAreReverseConsecutive = true;
13579   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13580   SDValue FirstInput = N->getOperand(0);
13581   bool IsRoundOfExtLoad = false;
13582 
13583   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13584       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13585     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13586     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13587   }
13588   // Not a build vector of (possibly fp_rounded) loads.
13589   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13590       N->getNumOperands() == 1)
13591     return SDValue();
13592 
13593   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13594     // If any inputs are fp_round(extload), they all must be.
13595     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13596       return SDValue();
13597 
13598     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13599       N->getOperand(i);
13600     if (NextInput.getOpcode() != ISD::LOAD)
13601       return SDValue();
13602 
13603     SDValue PreviousInput =
13604       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13605     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13606     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13607 
13608     // If any inputs are fp_round(extload), they all must be.
13609     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13610       return SDValue();
13611 
13612     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13613       InputsAreConsecutiveLoads = false;
13614     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13615       InputsAreReverseConsecutive = false;
13616 
13617     // Exit early if the loads are neither consecutive nor reverse consecutive.
13618     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13619       return SDValue();
13620   }
13621 
13622   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13623          "The loads cannot be both consecutive and reverse consecutive.");
13624 
13625   SDValue FirstLoadOp =
13626     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13627   SDValue LastLoadOp =
13628     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13629                        N->getOperand(N->getNumOperands()-1);
13630 
13631   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13632   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13633   if (InputsAreConsecutiveLoads) {
13634     assert(LD1 && "Input needs to be a LoadSDNode.");
13635     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13636                        LD1->getBasePtr(), LD1->getPointerInfo(),
13637                        LD1->getAlignment());
13638   }
13639   if (InputsAreReverseConsecutive) {
13640     assert(LDL && "Input needs to be a LoadSDNode.");
13641     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13642                                LDL->getBasePtr(), LDL->getPointerInfo(),
13643                                LDL->getAlignment());
13644     SmallVector<int, 16> Ops;
13645     for (int i = N->getNumOperands() - 1; i >= 0; i--)
13646       Ops.push_back(i);
13647 
13648     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13649                                 DAG.getUNDEF(N->getValueType(0)), Ops);
13650   }
13651   return SDValue();
13652 }
13653 
13654 // This function adds the required vector_shuffle needed to get
13655 // the elements of the vector extract in the correct position
13656 // as specified by the CorrectElems encoding.
13657 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
13658                                       SDValue Input, uint64_t Elems,
13659                                       uint64_t CorrectElems) {
13660   SDLoc dl(N);
13661 
13662   unsigned NumElems = Input.getValueType().getVectorNumElements();
13663   SmallVector<int, 16> ShuffleMask(NumElems, -1);
13664 
13665   // Knowing the element indices being extracted from the original
13666   // vector and the order in which they're being inserted, just put
13667   // them at element indices required for the instruction.
13668   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13669     if (DAG.getDataLayout().isLittleEndian())
13670       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13671     else
13672       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13673     CorrectElems = CorrectElems >> 8;
13674     Elems = Elems >> 8;
13675   }
13676 
13677   SDValue Shuffle =
13678       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13679                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13680 
13681   EVT VT = N->getValueType(0);
13682   SDValue Conv = DAG.getBitcast(VT, Shuffle);
13683 
13684   EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13685                                Input.getValueType().getVectorElementType(),
13686                                VT.getVectorNumElements());
13687   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13688                      DAG.getValueType(ExtVT));
13689 }
13690 
13691 // Look for build vector patterns where input operands come from sign
13692 // extended vector_extract elements of specific indices. If the correct indices
13693 // aren't used, add a vector shuffle to fix up the indices and create
13694 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13695 // during instruction selection.
13696 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
13697   // This array encodes the indices that the vector sign extend instructions
13698   // extract from when extending from one type to another for both BE and LE.
13699   // The right nibble of each byte corresponds to the LE incides.
13700   // and the left nibble of each byte corresponds to the BE incides.
13701   // For example: 0x3074B8FC  byte->word
13702   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13703   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13704   // For example: 0x000070F8  byte->double word
13705   // For LE: the allowed indices are: 0x0,0x8
13706   // For BE: the allowed indices are: 0x7,0xF
13707   uint64_t TargetElems[] = {
13708       0x3074B8FC, // b->w
13709       0x000070F8, // b->d
13710       0x10325476, // h->w
13711       0x00003074, // h->d
13712       0x00001032, // w->d
13713   };
13714 
13715   uint64_t Elems = 0;
13716   int Index;
13717   SDValue Input;
13718 
13719   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13720     if (!Op)
13721       return false;
13722     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13723         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13724       return false;
13725 
13726     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13727     // of the right width.
13728     SDValue Extract = Op.getOperand(0);
13729     if (Extract.getOpcode() == ISD::ANY_EXTEND)
13730       Extract = Extract.getOperand(0);
13731     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13732       return false;
13733 
13734     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13735     if (!ExtOp)
13736       return false;
13737 
13738     Index = ExtOp->getZExtValue();
13739     if (Input && Input != Extract.getOperand(0))
13740       return false;
13741 
13742     if (!Input)
13743       Input = Extract.getOperand(0);
13744 
13745     Elems = Elems << 8;
13746     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13747     Elems |= Index;
13748 
13749     return true;
13750   };
13751 
13752   // If the build vector operands aren't sign extended vector extracts,
13753   // of the same input vector, then return.
13754   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13755     if (!isSExtOfVecExtract(N->getOperand(i))) {
13756       return SDValue();
13757     }
13758   }
13759 
13760   // If the vector extract indicies are not correct, add the appropriate
13761   // vector_shuffle.
13762   int TgtElemArrayIdx;
13763   int InputSize = Input.getValueType().getScalarSizeInBits();
13764   int OutputSize = N->getValueType(0).getScalarSizeInBits();
13765   if (InputSize + OutputSize == 40)
13766     TgtElemArrayIdx = 0;
13767   else if (InputSize + OutputSize == 72)
13768     TgtElemArrayIdx = 1;
13769   else if (InputSize + OutputSize == 48)
13770     TgtElemArrayIdx = 2;
13771   else if (InputSize + OutputSize == 80)
13772     TgtElemArrayIdx = 3;
13773   else if (InputSize + OutputSize == 96)
13774     TgtElemArrayIdx = 4;
13775   else
13776     return SDValue();
13777 
13778   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13779   CorrectElems = DAG.getDataLayout().isLittleEndian()
13780                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13781                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13782   if (Elems != CorrectElems) {
13783     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13784   }
13785 
13786   // Regular lowering will catch cases where a shuffle is not needed.
13787   return SDValue();
13788 }
13789 
13790 // Look for the pattern of a load from a narrow width to i128, feeding
13791 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
13792 // (LXVRZX). This node represents a zero extending load that will be matched
13793 // to the Load VSX Vector Rightmost instructions.
13794 static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
13795   SDLoc DL(N);
13796 
13797   // This combine is only eligible for a BUILD_VECTOR of v1i128.
13798   if (N->getValueType(0) != MVT::v1i128)
13799     return SDValue();
13800 
13801   SDValue Operand = N->getOperand(0);
13802   // Proceed with the transformation if the operand to the BUILD_VECTOR
13803   // is a load instruction.
13804   if (Operand.getOpcode() != ISD::LOAD)
13805     return SDValue();
13806 
13807   LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
13808   EVT MemoryType = LD->getMemoryVT();
13809 
13810   // This transformation is only valid if the we are loading either a byte,
13811   // halfword, word, or doubleword.
13812   bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
13813                      MemoryType == MVT::i32 || MemoryType == MVT::i64;
13814 
13815   // Ensure that the load from the narrow width is being zero extended to i128.
13816   if (!ValidLDType ||
13817       (LD->getExtensionType() != ISD::ZEXTLOAD &&
13818        LD->getExtensionType() != ISD::EXTLOAD))
13819     return SDValue();
13820 
13821   SDValue LoadOps[] = {
13822       LD->getChain(), LD->getBasePtr(),
13823       DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
13824 
13825   return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
13826                                  DAG.getVTList(MVT::v1i128, MVT::Other),
13827                                  LoadOps, MemoryType, LD->getMemOperand());
13828 }
13829 
13830 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
13831                                                  DAGCombinerInfo &DCI) const {
13832   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13833          "Should be called with a BUILD_VECTOR node");
13834 
13835   SelectionDAG &DAG = DCI.DAG;
13836   SDLoc dl(N);
13837 
13838   if (!Subtarget.hasVSX())
13839     return SDValue();
13840 
13841   // The target independent DAG combiner will leave a build_vector of
13842   // float-to-int conversions intact. We can generate MUCH better code for
13843   // a float-to-int conversion of a vector of floats.
13844   SDValue FirstInput = N->getOperand(0);
13845   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
13846     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
13847     if (Reduced)
13848       return Reduced;
13849   }
13850 
13851   // If we're building a vector out of consecutive loads, just load that
13852   // vector type.
13853   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
13854   if (Reduced)
13855     return Reduced;
13856 
13857   // If we're building a vector out of extended elements from another vector
13858   // we have P9 vector integer extend instructions. The code assumes legal
13859   // input types (i.e. it can't handle things like v4i16) so do not run before
13860   // legalization.
13861   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
13862     Reduced = combineBVOfVecSExt(N, DAG);
13863     if (Reduced)
13864       return Reduced;
13865   }
13866 
13867   // On Power10, the Load VSX Vector Rightmost instructions can be utilized
13868   // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
13869   // is a load from <valid narrow width> to i128.
13870   if (Subtarget.isISA3_1()) {
13871     SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
13872     if (BVOfZLoad)
13873       return BVOfZLoad;
13874   }
13875 
13876   if (N->getValueType(0) != MVT::v2f64)
13877     return SDValue();
13878 
13879   // Looking for:
13880   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
13881   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
13882       FirstInput.getOpcode() != ISD::UINT_TO_FP)
13883     return SDValue();
13884   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
13885       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
13886     return SDValue();
13887   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
13888     return SDValue();
13889 
13890   SDValue Ext1 = FirstInput.getOperand(0);
13891   SDValue Ext2 = N->getOperand(1).getOperand(0);
13892   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13893      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13894     return SDValue();
13895 
13896   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
13897   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
13898   if (!Ext1Op || !Ext2Op)
13899     return SDValue();
13900   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
13901       Ext1.getOperand(0) != Ext2.getOperand(0))
13902     return SDValue();
13903 
13904   int FirstElem = Ext1Op->getZExtValue();
13905   int SecondElem = Ext2Op->getZExtValue();
13906   int SubvecIdx;
13907   if (FirstElem == 0 && SecondElem == 1)
13908     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
13909   else if (FirstElem == 2 && SecondElem == 3)
13910     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
13911   else
13912     return SDValue();
13913 
13914   SDValue SrcVec = Ext1.getOperand(0);
13915   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
13916     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
13917   return DAG.getNode(NodeType, dl, MVT::v2f64,
13918                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
13919 }
13920 
13921 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
13922                                               DAGCombinerInfo &DCI) const {
13923   assert((N->getOpcode() == ISD::SINT_TO_FP ||
13924           N->getOpcode() == ISD::UINT_TO_FP) &&
13925          "Need an int -> FP conversion node here");
13926 
13927   if (useSoftFloat() || !Subtarget.has64BitSupport())
13928     return SDValue();
13929 
13930   SelectionDAG &DAG = DCI.DAG;
13931   SDLoc dl(N);
13932   SDValue Op(N, 0);
13933 
13934   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
13935   // from the hardware.
13936   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
13937     return SDValue();
13938   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
13939       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
13940     return SDValue();
13941 
13942   SDValue FirstOperand(Op.getOperand(0));
13943   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
13944     (FirstOperand.getValueType() == MVT::i8 ||
13945      FirstOperand.getValueType() == MVT::i16);
13946   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
13947     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
13948     bool DstDouble = Op.getValueType() == MVT::f64;
13949     unsigned ConvOp = Signed ?
13950       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
13951       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
13952     SDValue WidthConst =
13953       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
13954                             dl, false);
13955     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
13956     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
13957     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
13958                                          DAG.getVTList(MVT::f64, MVT::Other),
13959                                          Ops, MVT::i8, LDN->getMemOperand());
13960 
13961     // For signed conversion, we need to sign-extend the value in the VSR
13962     if (Signed) {
13963       SDValue ExtOps[] = { Ld, WidthConst };
13964       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
13965       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
13966     } else
13967       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
13968   }
13969 
13970 
13971   // For i32 intermediate values, unfortunately, the conversion functions
13972   // leave the upper 32 bits of the value are undefined. Within the set of
13973   // scalar instructions, we have no method for zero- or sign-extending the
13974   // value. Thus, we cannot handle i32 intermediate values here.
13975   if (Op.getOperand(0).getValueType() == MVT::i32)
13976     return SDValue();
13977 
13978   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
13979          "UINT_TO_FP is supported only with FPCVT");
13980 
13981   // If we have FCFIDS, then use it when converting to single-precision.
13982   // Otherwise, convert to double-precision and then round.
13983   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13984                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
13985                                                             : PPCISD::FCFIDS)
13986                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
13987                                                             : PPCISD::FCFID);
13988   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13989                   ? MVT::f32
13990                   : MVT::f64;
13991 
13992   // If we're converting from a float, to an int, and back to a float again,
13993   // then we don't need the store/load pair at all.
13994   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
13995        Subtarget.hasFPCVT()) ||
13996       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
13997     SDValue Src = Op.getOperand(0).getOperand(0);
13998     if (Src.getValueType() == MVT::f32) {
13999       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14000       DCI.AddToWorklist(Src.getNode());
14001     } else if (Src.getValueType() != MVT::f64) {
14002       // Make sure that we don't pick up a ppc_fp128 source value.
14003       return SDValue();
14004     }
14005 
14006     unsigned FCTOp =
14007       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14008                                                         PPCISD::FCTIDUZ;
14009 
14010     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14011     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14012 
14013     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14014       FP = DAG.getNode(ISD::FP_ROUND, dl,
14015                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14016       DCI.AddToWorklist(FP.getNode());
14017     }
14018 
14019     return FP;
14020   }
14021 
14022   return SDValue();
14023 }
14024 
14025 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14026 // builtins) into loads with swaps.
14027 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
14028                                               DAGCombinerInfo &DCI) const {
14029   SelectionDAG &DAG = DCI.DAG;
14030   SDLoc dl(N);
14031   SDValue Chain;
14032   SDValue Base;
14033   MachineMemOperand *MMO;
14034 
14035   switch (N->getOpcode()) {
14036   default:
14037     llvm_unreachable("Unexpected opcode for little endian VSX load");
14038   case ISD::LOAD: {
14039     LoadSDNode *LD = cast<LoadSDNode>(N);
14040     Chain = LD->getChain();
14041     Base = LD->getBasePtr();
14042     MMO = LD->getMemOperand();
14043     // If the MMO suggests this isn't a load of a full vector, leave
14044     // things alone.  For a built-in, we have to make the change for
14045     // correctness, so if there is a size problem that will be a bug.
14046     if (MMO->getSize() < 16)
14047       return SDValue();
14048     break;
14049   }
14050   case ISD::INTRINSIC_W_CHAIN: {
14051     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14052     Chain = Intrin->getChain();
14053     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14054     // us what we want. Get operand 2 instead.
14055     Base = Intrin->getOperand(2);
14056     MMO = Intrin->getMemOperand();
14057     break;
14058   }
14059   }
14060 
14061   MVT VecTy = N->getValueType(0).getSimpleVT();
14062 
14063   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
14064   // aligned and the type is a vector with elements up to 4 bytes
14065   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14066       VecTy.getScalarSizeInBits() <= 32) {
14067     return SDValue();
14068   }
14069 
14070   SDValue LoadOps[] = { Chain, Base };
14071   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
14072                                          DAG.getVTList(MVT::v2f64, MVT::Other),
14073                                          LoadOps, MVT::v2f64, MMO);
14074 
14075   DCI.AddToWorklist(Load.getNode());
14076   Chain = Load.getValue(1);
14077   SDValue Swap = DAG.getNode(
14078       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14079   DCI.AddToWorklist(Swap.getNode());
14080 
14081   // Add a bitcast if the resulting load type doesn't match v2f64.
14082   if (VecTy != MVT::v2f64) {
14083     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14084     DCI.AddToWorklist(N.getNode());
14085     // Package {bitcast value, swap's chain} to match Load's shape.
14086     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14087                        N, Swap.getValue(1));
14088   }
14089 
14090   return Swap;
14091 }
14092 
14093 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14094 // builtins) into stores with swaps.
14095 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
14096                                                DAGCombinerInfo &DCI) const {
14097   SelectionDAG &DAG = DCI.DAG;
14098   SDLoc dl(N);
14099   SDValue Chain;
14100   SDValue Base;
14101   unsigned SrcOpnd;
14102   MachineMemOperand *MMO;
14103 
14104   switch (N->getOpcode()) {
14105   default:
14106     llvm_unreachable("Unexpected opcode for little endian VSX store");
14107   case ISD::STORE: {
14108     StoreSDNode *ST = cast<StoreSDNode>(N);
14109     Chain = ST->getChain();
14110     Base = ST->getBasePtr();
14111     MMO = ST->getMemOperand();
14112     SrcOpnd = 1;
14113     // If the MMO suggests this isn't a store of a full vector, leave
14114     // things alone.  For a built-in, we have to make the change for
14115     // correctness, so if there is a size problem that will be a bug.
14116     if (MMO->getSize() < 16)
14117       return SDValue();
14118     break;
14119   }
14120   case ISD::INTRINSIC_VOID: {
14121     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14122     Chain = Intrin->getChain();
14123     // Intrin->getBasePtr() oddly does not get what we want.
14124     Base = Intrin->getOperand(3);
14125     MMO = Intrin->getMemOperand();
14126     SrcOpnd = 2;
14127     break;
14128   }
14129   }
14130 
14131   SDValue Src = N->getOperand(SrcOpnd);
14132   MVT VecTy = Src.getValueType().getSimpleVT();
14133 
14134   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
14135   // aligned and the type is a vector with elements up to 4 bytes
14136   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14137       VecTy.getScalarSizeInBits() <= 32) {
14138     return SDValue();
14139   }
14140 
14141   // All stores are done as v2f64 and possible bit cast.
14142   if (VecTy != MVT::v2f64) {
14143     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14144     DCI.AddToWorklist(Src.getNode());
14145   }
14146 
14147   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14148                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14149   DCI.AddToWorklist(Swap.getNode());
14150   Chain = Swap.getValue(1);
14151   SDValue StoreOps[] = { Chain, Swap, Base };
14152   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
14153                                           DAG.getVTList(MVT::Other),
14154                                           StoreOps, VecTy, MMO);
14155   DCI.AddToWorklist(Store.getNode());
14156   return Store;
14157 }
14158 
14159 // Handle DAG combine for STORE (FP_TO_INT F).
14160 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14161                                                DAGCombinerInfo &DCI) const {
14162 
14163   SelectionDAG &DAG = DCI.DAG;
14164   SDLoc dl(N);
14165   unsigned Opcode = N->getOperand(1).getOpcode();
14166 
14167   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
14168          && "Not a FP_TO_INT Instruction!");
14169 
14170   SDValue Val = N->getOperand(1).getOperand(0);
14171   EVT Op1VT = N->getOperand(1).getValueType();
14172   EVT ResVT = Val.getValueType();
14173 
14174   if (!isTypeLegal(ResVT))
14175     return SDValue();
14176 
14177   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14178   bool ValidTypeForStoreFltAsInt =
14179         (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14180          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14181 
14182   if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14183       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14184     return SDValue();
14185 
14186   // Extend f32 values to f64
14187   if (ResVT.getScalarSizeInBits() == 32) {
14188     Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14189     DCI.AddToWorklist(Val.getNode());
14190   }
14191 
14192   // Set signed or unsigned conversion opcode.
14193   unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14194                           PPCISD::FP_TO_SINT_IN_VSR :
14195                           PPCISD::FP_TO_UINT_IN_VSR;
14196 
14197   Val = DAG.getNode(ConvOpcode,
14198                     dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14199   DCI.AddToWorklist(Val.getNode());
14200 
14201   // Set number of bytes being converted.
14202   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14203   SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14204                     DAG.getIntPtrConstant(ByteSize, dl, false),
14205                     DAG.getValueType(Op1VT) };
14206 
14207   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
14208           DAG.getVTList(MVT::Other), Ops,
14209           cast<StoreSDNode>(N)->getMemoryVT(),
14210           cast<StoreSDNode>(N)->getMemOperand());
14211 
14212   DCI.AddToWorklist(Val.getNode());
14213   return Val;
14214 }
14215 
14216 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14217   // Check that the source of the element keeps flipping
14218   // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14219   bool PrevElemFromFirstVec = Mask[0] < NumElts;
14220   for (int i = 1, e = Mask.size(); i < e; i++) {
14221     if (PrevElemFromFirstVec && Mask[i] < NumElts)
14222       return false;
14223     if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14224       return false;
14225     PrevElemFromFirstVec = !PrevElemFromFirstVec;
14226   }
14227   return true;
14228 }
14229 
14230 static bool isSplatBV(SDValue Op) {
14231   if (Op.getOpcode() != ISD::BUILD_VECTOR)
14232     return false;
14233   SDValue FirstOp;
14234 
14235   // Find first non-undef input.
14236   for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14237     FirstOp = Op.getOperand(i);
14238     if (!FirstOp.isUndef())
14239       break;
14240   }
14241 
14242   // All inputs are undef or the same as the first non-undef input.
14243   for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14244     if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14245       return false;
14246   return true;
14247 }
14248 
14249 static SDValue isScalarToVec(SDValue Op) {
14250   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14251     return Op;
14252   if (Op.getOpcode() != ISD::BITCAST)
14253     return SDValue();
14254   Op = Op.getOperand(0);
14255   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14256     return Op;
14257   return SDValue();
14258 }
14259 
14260 static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
14261                                             int LHSMaxIdx, int RHSMinIdx,
14262                                             int RHSMaxIdx, int HalfVec) {
14263   for (int i = 0, e = ShuffV.size(); i < e; i++) {
14264     int Idx = ShuffV[i];
14265     if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14266       ShuffV[i] += HalfVec;
14267   }
14268   return;
14269 }
14270 
14271 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14272 // the original is:
14273 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14274 // In such a case, just change the shuffle mask to extract the element
14275 // from the permuted index.
14276 static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
14277   SDLoc dl(OrigSToV);
14278   EVT VT = OrigSToV.getValueType();
14279   assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14280          "Expecting a SCALAR_TO_VECTOR here");
14281   SDValue Input = OrigSToV.getOperand(0);
14282 
14283   if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14284     ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14285     SDValue OrigVector = Input.getOperand(0);
14286 
14287     // Can't handle non-const element indices or different vector types
14288     // for the input to the extract and the output of the scalar_to_vector.
14289     if (Idx && VT == OrigVector.getValueType()) {
14290       SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
14291       NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
14292       return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14293     }
14294   }
14295   return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14296                      OrigSToV.getOperand(0));
14297 }
14298 
14299 // On little endian subtargets, combine shuffles such as:
14300 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14301 // into:
14302 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14303 // because the latter can be matched to a single instruction merge.
14304 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14305 // to put the value into element zero. Adjust the shuffle mask so that the
14306 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
14307 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14308                                                 SelectionDAG &DAG) const {
14309   SDValue LHS = SVN->getOperand(0);
14310   SDValue RHS = SVN->getOperand(1);
14311   auto Mask = SVN->getMask();
14312   int NumElts = LHS.getValueType().getVectorNumElements();
14313   SDValue Res(SVN, 0);
14314   SDLoc dl(SVN);
14315 
14316   // None of these combines are useful on big endian systems since the ISA
14317   // already has a big endian bias.
14318   if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14319     return Res;
14320 
14321   // If this is not a shuffle of a shuffle and the first element comes from
14322   // the second vector, canonicalize to the commuted form. This will make it
14323   // more likely to match one of the single instruction patterns.
14324   if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14325       RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14326     std::swap(LHS, RHS);
14327     Res = DAG.getCommutedVectorShuffle(*SVN);
14328     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14329   }
14330 
14331   // Adjust the shuffle mask if either input vector comes from a
14332   // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14333   // form (to prevent the need for a swap).
14334   SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14335   SDValue SToVLHS = isScalarToVec(LHS);
14336   SDValue SToVRHS = isScalarToVec(RHS);
14337   if (SToVLHS || SToVRHS) {
14338     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14339                             : SToVRHS.getValueType().getVectorNumElements();
14340     int NumEltsOut = ShuffV.size();
14341 
14342     // Initially assume that neither input is permuted. These will be adjusted
14343     // accordingly if either input is.
14344     int LHSMaxIdx = -1;
14345     int RHSMinIdx = -1;
14346     int RHSMaxIdx = -1;
14347     int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14348 
14349     // Get the permuted scalar to vector nodes for the source(s) that come from
14350     // ISD::SCALAR_TO_VECTOR.
14351     if (SToVLHS) {
14352       // Set up the values for the shuffle vector fixup.
14353       LHSMaxIdx = NumEltsOut / NumEltsIn;
14354       SToVLHS = getSToVPermuted(SToVLHS, DAG);
14355       if (SToVLHS.getValueType() != LHS.getValueType())
14356         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14357       LHS = SToVLHS;
14358     }
14359     if (SToVRHS) {
14360       RHSMinIdx = NumEltsOut;
14361       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14362       SToVRHS = getSToVPermuted(SToVRHS, DAG);
14363       if (SToVRHS.getValueType() != RHS.getValueType())
14364         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14365       RHS = SToVRHS;
14366     }
14367 
14368     // Fix up the shuffle mask to reflect where the desired element actually is.
14369     // The minimum and maximum indices that correspond to element zero for both
14370     // the LHS and RHS are computed and will control which shuffle mask entries
14371     // are to be changed. For example, if the RHS is permuted, any shuffle mask
14372     // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14373     // HalfVec to refer to the corresponding element in the permuted vector.
14374     fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14375                                     HalfVec);
14376     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14377 
14378     // We may have simplified away the shuffle. We won't be able to do anything
14379     // further with it here.
14380     if (!isa<ShuffleVectorSDNode>(Res))
14381       return Res;
14382     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14383   }
14384 
14385   // The common case after we commuted the shuffle is that the RHS is a splat
14386   // and we have elements coming in from the splat at indices that are not
14387   // conducive to using a merge.
14388   // Example:
14389   // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14390   if (!isSplatBV(RHS))
14391     return Res;
14392 
14393   // We are looking for a mask such that all even elements are from
14394   // one vector and all odd elements from the other.
14395   if (!isAlternatingShuffMask(Mask, NumElts))
14396     return Res;
14397 
14398   // Adjust the mask so we are pulling in the same index from the splat
14399   // as the index from the interesting vector in consecutive elements.
14400   // Example (even elements from first vector):
14401   // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14402   if (Mask[0] < NumElts)
14403     for (int i = 1, e = Mask.size(); i < e; i += 2)
14404       ShuffV[i] = (ShuffV[i - 1] + NumElts);
14405   // Example (odd elements from first vector):
14406   // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14407   else
14408     for (int i = 0, e = Mask.size(); i < e; i += 2)
14409       ShuffV[i] = (ShuffV[i + 1] + NumElts);
14410 
14411   // If the RHS has undefs, we need to remove them since we may have created
14412   // a shuffle that adds those instead of the splat value.
14413   SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14414   RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14415 
14416   Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14417   return Res;
14418 }
14419 
14420 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14421                                                 LSBaseSDNode *LSBase,
14422                                                 DAGCombinerInfo &DCI) const {
14423   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14424         "Not a reverse memop pattern!");
14425 
14426   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14427     auto Mask = SVN->getMask();
14428     int i = 0;
14429     auto I = Mask.rbegin();
14430     auto E = Mask.rend();
14431 
14432     for (; I != E; ++I) {
14433       if (*I != i)
14434         return false;
14435       i++;
14436     }
14437     return true;
14438   };
14439 
14440   SelectionDAG &DAG = DCI.DAG;
14441   EVT VT = SVN->getValueType(0);
14442 
14443   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14444     return SDValue();
14445 
14446   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14447   // See comment in PPCVSXSwapRemoval.cpp.
14448   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14449   if (!Subtarget.hasP9Vector())
14450     return SDValue();
14451 
14452   if(!IsElementReverse(SVN))
14453     return SDValue();
14454 
14455   if (LSBase->getOpcode() == ISD::LOAD) {
14456     SDLoc dl(SVN);
14457     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14458     return DAG.getMemIntrinsicNode(
14459         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14460         LSBase->getMemoryVT(), LSBase->getMemOperand());
14461   }
14462 
14463   if (LSBase->getOpcode() == ISD::STORE) {
14464     SDLoc dl(LSBase);
14465     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14466                           LSBase->getBasePtr()};
14467     return DAG.getMemIntrinsicNode(
14468         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14469         LSBase->getMemoryVT(), LSBase->getMemOperand());
14470   }
14471 
14472   llvm_unreachable("Expected a load or store node here");
14473 }
14474 
14475 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
14476                                              DAGCombinerInfo &DCI) const {
14477   SelectionDAG &DAG = DCI.DAG;
14478   SDLoc dl(N);
14479   switch (N->getOpcode()) {
14480   default: break;
14481   case ISD::ADD:
14482     return combineADD(N, DCI);
14483   case ISD::SHL:
14484     return combineSHL(N, DCI);
14485   case ISD::SRA:
14486     return combineSRA(N, DCI);
14487   case ISD::SRL:
14488     return combineSRL(N, DCI);
14489   case ISD::MUL:
14490     return combineMUL(N, DCI);
14491   case ISD::FMA:
14492   case PPCISD::FNMSUB:
14493     return combineFMALike(N, DCI);
14494   case PPCISD::SHL:
14495     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14496         return N->getOperand(0);
14497     break;
14498   case PPCISD::SRL:
14499     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14500         return N->getOperand(0);
14501     break;
14502   case PPCISD::SRA:
14503     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14504       if (C->isNullValue() ||   //  0 >>s V -> 0.
14505           C->isAllOnesValue())    // -1 >>s V -> -1.
14506         return N->getOperand(0);
14507     }
14508     break;
14509   case ISD::SIGN_EXTEND:
14510   case ISD::ZERO_EXTEND:
14511   case ISD::ANY_EXTEND:
14512     return DAGCombineExtBoolTrunc(N, DCI);
14513   case ISD::TRUNCATE:
14514     return combineTRUNCATE(N, DCI);
14515   case ISD::SETCC:
14516     if (SDValue CSCC = combineSetCC(N, DCI))
14517       return CSCC;
14518     LLVM_FALLTHROUGH;
14519   case ISD::SELECT_CC:
14520     return DAGCombineTruncBoolExt(N, DCI);
14521   case ISD::SINT_TO_FP:
14522   case ISD::UINT_TO_FP:
14523     return combineFPToIntToFP(N, DCI);
14524   case ISD::VECTOR_SHUFFLE:
14525     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14526       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14527       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14528     }
14529     return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14530   case ISD::STORE: {
14531 
14532     EVT Op1VT = N->getOperand(1).getValueType();
14533     unsigned Opcode = N->getOperand(1).getOpcode();
14534 
14535     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14536       SDValue Val= combineStoreFPToInt(N, DCI);
14537       if (Val)
14538         return Val;
14539     }
14540 
14541     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14542       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14543       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14544       if (Val)
14545         return Val;
14546     }
14547 
14548     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14549     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14550         N->getOperand(1).getNode()->hasOneUse() &&
14551         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14552          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14553 
14554       // STBRX can only handle simple types and it makes no sense to store less
14555       // two bytes in byte-reversed order.
14556       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14557       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14558         break;
14559 
14560       SDValue BSwapOp = N->getOperand(1).getOperand(0);
14561       // Do an any-extend to 32-bits if this is a half-word input.
14562       if (BSwapOp.getValueType() == MVT::i16)
14563         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14564 
14565       // If the type of BSWAP operand is wider than stored memory width
14566       // it need to be shifted to the right side before STBRX.
14567       if (Op1VT.bitsGT(mVT)) {
14568         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14569         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14570                               DAG.getConstant(Shift, dl, MVT::i32));
14571         // Need to truncate if this is a bswap of i64 stored as i32/i16.
14572         if (Op1VT == MVT::i64)
14573           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14574       }
14575 
14576       SDValue Ops[] = {
14577         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14578       };
14579       return
14580         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
14581                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14582                                 cast<StoreSDNode>(N)->getMemOperand());
14583     }
14584 
14585     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
14586     // So it can increase the chance of CSE constant construction.
14587     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14588         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14589       // Need to sign-extended to 64-bits to handle negative values.
14590       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14591       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14592                                     MemVT.getSizeInBits());
14593       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14594 
14595       // DAG.getTruncStore() can't be used here because it doesn't accept
14596       // the general (base + offset) addressing mode.
14597       // So we use UpdateNodeOperands and setTruncatingStore instead.
14598       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14599                              N->getOperand(3));
14600       cast<StoreSDNode>(N)->setTruncatingStore(true);
14601       return SDValue(N, 0);
14602     }
14603 
14604     // For little endian, VSX stores require generating xxswapd/lxvd2x.
14605     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14606     if (Op1VT.isSimple()) {
14607       MVT StoreVT = Op1VT.getSimpleVT();
14608       if (Subtarget.needsSwapsForVSXMemOps() &&
14609           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14610            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14611         return expandVSXStoreForLE(N, DCI);
14612     }
14613     break;
14614   }
14615   case ISD::LOAD: {
14616     LoadSDNode *LD = cast<LoadSDNode>(N);
14617     EVT VT = LD->getValueType(0);
14618 
14619     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14620     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14621     if (VT.isSimple()) {
14622       MVT LoadVT = VT.getSimpleVT();
14623       if (Subtarget.needsSwapsForVSXMemOps() &&
14624           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14625            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14626         return expandVSXLoadForLE(N, DCI);
14627     }
14628 
14629     // We sometimes end up with a 64-bit integer load, from which we extract
14630     // two single-precision floating-point numbers. This happens with
14631     // std::complex<float>, and other similar structures, because of the way we
14632     // canonicalize structure copies. However, if we lack direct moves,
14633     // then the final bitcasts from the extracted integer values to the
14634     // floating-point numbers turn into store/load pairs. Even with direct moves,
14635     // just loading the two floating-point numbers is likely better.
14636     auto ReplaceTwoFloatLoad = [&]() {
14637       if (VT != MVT::i64)
14638         return false;
14639 
14640       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14641           LD->isVolatile())
14642         return false;
14643 
14644       //  We're looking for a sequence like this:
14645       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14646       //      t16: i64 = srl t13, Constant:i32<32>
14647       //    t17: i32 = truncate t16
14648       //  t18: f32 = bitcast t17
14649       //    t19: i32 = truncate t13
14650       //  t20: f32 = bitcast t19
14651 
14652       if (!LD->hasNUsesOfValue(2, 0))
14653         return false;
14654 
14655       auto UI = LD->use_begin();
14656       while (UI.getUse().getResNo() != 0) ++UI;
14657       SDNode *Trunc = *UI++;
14658       while (UI.getUse().getResNo() != 0) ++UI;
14659       SDNode *RightShift = *UI;
14660       if (Trunc->getOpcode() != ISD::TRUNCATE)
14661         std::swap(Trunc, RightShift);
14662 
14663       if (Trunc->getOpcode() != ISD::TRUNCATE ||
14664           Trunc->getValueType(0) != MVT::i32 ||
14665           !Trunc->hasOneUse())
14666         return false;
14667       if (RightShift->getOpcode() != ISD::SRL ||
14668           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14669           RightShift->getConstantOperandVal(1) != 32 ||
14670           !RightShift->hasOneUse())
14671         return false;
14672 
14673       SDNode *Trunc2 = *RightShift->use_begin();
14674       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14675           Trunc2->getValueType(0) != MVT::i32 ||
14676           !Trunc2->hasOneUse())
14677         return false;
14678 
14679       SDNode *Bitcast = *Trunc->use_begin();
14680       SDNode *Bitcast2 = *Trunc2->use_begin();
14681 
14682       if (Bitcast->getOpcode() != ISD::BITCAST ||
14683           Bitcast->getValueType(0) != MVT::f32)
14684         return false;
14685       if (Bitcast2->getOpcode() != ISD::BITCAST ||
14686           Bitcast2->getValueType(0) != MVT::f32)
14687         return false;
14688 
14689       if (Subtarget.isLittleEndian())
14690         std::swap(Bitcast, Bitcast2);
14691 
14692       // Bitcast has the second float (in memory-layout order) and Bitcast2
14693       // has the first one.
14694 
14695       SDValue BasePtr = LD->getBasePtr();
14696       if (LD->isIndexed()) {
14697         assert(LD->getAddressingMode() == ISD::PRE_INC &&
14698                "Non-pre-inc AM on PPC?");
14699         BasePtr =
14700           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14701                       LD->getOffset());
14702       }
14703 
14704       auto MMOFlags =
14705           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14706       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14707                                       LD->getPointerInfo(), LD->getAlignment(),
14708                                       MMOFlags, LD->getAAInfo());
14709       SDValue AddPtr =
14710         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14711                     BasePtr, DAG.getIntPtrConstant(4, dl));
14712       SDValue FloatLoad2 = DAG.getLoad(
14713           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14714           LD->getPointerInfo().getWithOffset(4),
14715           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14716 
14717       if (LD->isIndexed()) {
14718         // Note that DAGCombine should re-form any pre-increment load(s) from
14719         // what is produced here if that makes sense.
14720         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14721       }
14722 
14723       DCI.CombineTo(Bitcast2, FloatLoad);
14724       DCI.CombineTo(Bitcast, FloatLoad2);
14725 
14726       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14727                                     SDValue(FloatLoad2.getNode(), 1));
14728       return true;
14729     };
14730 
14731     if (ReplaceTwoFloatLoad())
14732       return SDValue(N, 0);
14733 
14734     EVT MemVT = LD->getMemoryVT();
14735     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14736     Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14737     if (LD->isUnindexed() && VT.isVector() &&
14738         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14739           // P8 and later hardware should just use LOAD.
14740           !Subtarget.hasP8Vector() &&
14741           (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14742            VT == MVT::v4f32))) &&
14743         LD->getAlign() < ABIAlignment) {
14744       // This is a type-legal unaligned Altivec load.
14745       SDValue Chain = LD->getChain();
14746       SDValue Ptr = LD->getBasePtr();
14747       bool isLittleEndian = Subtarget.isLittleEndian();
14748 
14749       // This implements the loading of unaligned vectors as described in
14750       // the venerable Apple Velocity Engine overview. Specifically:
14751       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14752       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14753       //
14754       // The general idea is to expand a sequence of one or more unaligned
14755       // loads into an alignment-based permutation-control instruction (lvsl
14756       // or lvsr), a series of regular vector loads (which always truncate
14757       // their input address to an aligned address), and a series of
14758       // permutations.  The results of these permutations are the requested
14759       // loaded values.  The trick is that the last "extra" load is not taken
14760       // from the address you might suspect (sizeof(vector) bytes after the
14761       // last requested load), but rather sizeof(vector) - 1 bytes after the
14762       // last requested vector. The point of this is to avoid a page fault if
14763       // the base address happened to be aligned. This works because if the
14764       // base address is aligned, then adding less than a full vector length
14765       // will cause the last vector in the sequence to be (re)loaded.
14766       // Otherwise, the next vector will be fetched as you might suspect was
14767       // necessary.
14768 
14769       // We might be able to reuse the permutation generation from
14770       // a different base address offset from this one by an aligned amount.
14771       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14772       // optimization later.
14773       Intrinsic::ID Intr, IntrLD, IntrPerm;
14774       MVT PermCntlTy, PermTy, LDTy;
14775       Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14776                             : Intrinsic::ppc_altivec_lvsl;
14777       IntrLD = Intrinsic::ppc_altivec_lvx;
14778       IntrPerm = Intrinsic::ppc_altivec_vperm;
14779       PermCntlTy = MVT::v16i8;
14780       PermTy = MVT::v4i32;
14781       LDTy = MVT::v4i32;
14782 
14783       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
14784 
14785       // Create the new MMO for the new base load. It is like the original MMO,
14786       // but represents an area in memory almost twice the vector size centered
14787       // on the original address. If the address is unaligned, we might start
14788       // reading up to (sizeof(vector)-1) bytes below the address of the
14789       // original unaligned load.
14790       MachineFunction &MF = DAG.getMachineFunction();
14791       MachineMemOperand *BaseMMO =
14792         MF.getMachineMemOperand(LD->getMemOperand(),
14793                                 -(long)MemVT.getStoreSize()+1,
14794                                 2*MemVT.getStoreSize()-1);
14795 
14796       // Create the new base load.
14797       SDValue LDXIntID =
14798           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
14799       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14800       SDValue BaseLoad =
14801         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14802                                 DAG.getVTList(PermTy, MVT::Other),
14803                                 BaseLoadOps, LDTy, BaseMMO);
14804 
14805       // Note that the value of IncOffset (which is provided to the next
14806       // load's pointer info offset value, and thus used to calculate the
14807       // alignment), and the value of IncValue (which is actually used to
14808       // increment the pointer value) are different! This is because we
14809       // require the next load to appear to be aligned, even though it
14810       // is actually offset from the base pointer by a lesser amount.
14811       int IncOffset = VT.getSizeInBits() / 8;
14812       int IncValue = IncOffset;
14813 
14814       // Walk (both up and down) the chain looking for another load at the real
14815       // (aligned) offset (the alignment of the other load does not matter in
14816       // this case). If found, then do not use the offset reduction trick, as
14817       // that will prevent the loads from being later combined (as they would
14818       // otherwise be duplicates).
14819       if (!findConsecutiveLoad(LD, DAG))
14820         --IncValue;
14821 
14822       SDValue Increment =
14823           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
14824       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
14825 
14826       MachineMemOperand *ExtraMMO =
14827         MF.getMachineMemOperand(LD->getMemOperand(),
14828                                 1, 2*MemVT.getStoreSize()-1);
14829       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
14830       SDValue ExtraLoad =
14831         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14832                                 DAG.getVTList(PermTy, MVT::Other),
14833                                 ExtraLoadOps, LDTy, ExtraMMO);
14834 
14835       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
14836         BaseLoad.getValue(1), ExtraLoad.getValue(1));
14837 
14838       // Because vperm has a big-endian bias, we must reverse the order
14839       // of the input vectors and complement the permute control vector
14840       // when generating little endian code.  We have already handled the
14841       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
14842       // and ExtraLoad here.
14843       SDValue Perm;
14844       if (isLittleEndian)
14845         Perm = BuildIntrinsicOp(IntrPerm,
14846                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
14847       else
14848         Perm = BuildIntrinsicOp(IntrPerm,
14849                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
14850 
14851       if (VT != PermTy)
14852         Perm = Subtarget.hasAltivec()
14853                    ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
14854                    : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
14855                                  DAG.getTargetConstant(1, dl, MVT::i64));
14856                                // second argument is 1 because this rounding
14857                                // is always exact.
14858 
14859       // The output of the permutation is our loaded result, the TokenFactor is
14860       // our new chain.
14861       DCI.CombineTo(N, Perm, TF);
14862       return SDValue(N, 0);
14863     }
14864     }
14865     break;
14866     case ISD::INTRINSIC_WO_CHAIN: {
14867       bool isLittleEndian = Subtarget.isLittleEndian();
14868       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
14869       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14870                                            : Intrinsic::ppc_altivec_lvsl);
14871       if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
14872         SDValue Add = N->getOperand(1);
14873 
14874         int Bits = 4 /* 16 byte alignment */;
14875 
14876         if (DAG.MaskedValueIsZero(Add->getOperand(1),
14877                                   APInt::getAllOnesValue(Bits /* alignment */)
14878                                       .zext(Add.getScalarValueSizeInBits()))) {
14879           SDNode *BasePtr = Add->getOperand(0).getNode();
14880           for (SDNode::use_iterator UI = BasePtr->use_begin(),
14881                                     UE = BasePtr->use_end();
14882                UI != UE; ++UI) {
14883             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14884                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
14885                     IID) {
14886               // We've found another LVSL/LVSR, and this address is an aligned
14887               // multiple of that one. The results will be the same, so use the
14888               // one we've just found instead.
14889 
14890               return SDValue(*UI, 0);
14891             }
14892           }
14893         }
14894 
14895         if (isa<ConstantSDNode>(Add->getOperand(1))) {
14896           SDNode *BasePtr = Add->getOperand(0).getNode();
14897           for (SDNode::use_iterator UI = BasePtr->use_begin(),
14898                UE = BasePtr->use_end(); UI != UE; ++UI) {
14899             if (UI->getOpcode() == ISD::ADD &&
14900                 isa<ConstantSDNode>(UI->getOperand(1)) &&
14901                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
14902                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
14903                 (1ULL << Bits) == 0) {
14904               SDNode *OtherAdd = *UI;
14905               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
14906                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
14907                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14908                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
14909                   return SDValue(*VI, 0);
14910                 }
14911               }
14912             }
14913           }
14914         }
14915       }
14916 
14917       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
14918       // Expose the vabsduw/h/b opportunity for down stream
14919       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
14920           (IID == Intrinsic::ppc_altivec_vmaxsw ||
14921            IID == Intrinsic::ppc_altivec_vmaxsh ||
14922            IID == Intrinsic::ppc_altivec_vmaxsb)) {
14923         SDValue V1 = N->getOperand(1);
14924         SDValue V2 = N->getOperand(2);
14925         if ((V1.getSimpleValueType() == MVT::v4i32 ||
14926              V1.getSimpleValueType() == MVT::v8i16 ||
14927              V1.getSimpleValueType() == MVT::v16i8) &&
14928             V1.getSimpleValueType() == V2.getSimpleValueType()) {
14929           // (0-a, a)
14930           if (V1.getOpcode() == ISD::SUB &&
14931               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
14932               V1.getOperand(1) == V2) {
14933             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
14934           }
14935           // (a, 0-a)
14936           if (V2.getOpcode() == ISD::SUB &&
14937               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
14938               V2.getOperand(1) == V1) {
14939             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14940           }
14941           // (x-y, y-x)
14942           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
14943               V1.getOperand(0) == V2.getOperand(1) &&
14944               V1.getOperand(1) == V2.getOperand(0)) {
14945             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14946           }
14947         }
14948       }
14949     }
14950 
14951     break;
14952   case ISD::INTRINSIC_W_CHAIN:
14953     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14954     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14955     if (Subtarget.needsSwapsForVSXMemOps()) {
14956       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14957       default:
14958         break;
14959       case Intrinsic::ppc_vsx_lxvw4x:
14960       case Intrinsic::ppc_vsx_lxvd2x:
14961         return expandVSXLoadForLE(N, DCI);
14962       }
14963     }
14964     break;
14965   case ISD::INTRINSIC_VOID:
14966     // For little endian, VSX stores require generating xxswapd/stxvd2x.
14967     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14968     if (Subtarget.needsSwapsForVSXMemOps()) {
14969       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14970       default:
14971         break;
14972       case Intrinsic::ppc_vsx_stxvw4x:
14973       case Intrinsic::ppc_vsx_stxvd2x:
14974         return expandVSXStoreForLE(N, DCI);
14975       }
14976     }
14977     break;
14978   case ISD::BSWAP:
14979     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
14980     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
14981         N->getOperand(0).hasOneUse() &&
14982         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
14983          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
14984           N->getValueType(0) == MVT::i64))) {
14985       SDValue Load = N->getOperand(0);
14986       LoadSDNode *LD = cast<LoadSDNode>(Load);
14987       // Create the byte-swapping load.
14988       SDValue Ops[] = {
14989         LD->getChain(),    // Chain
14990         LD->getBasePtr(),  // Ptr
14991         DAG.getValueType(N->getValueType(0)) // VT
14992       };
14993       SDValue BSLoad =
14994         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
14995                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
14996                                               MVT::i64 : MVT::i32, MVT::Other),
14997                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
14998 
14999       // If this is an i16 load, insert the truncate.
15000       SDValue ResVal = BSLoad;
15001       if (N->getValueType(0) == MVT::i16)
15002         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15003 
15004       // First, combine the bswap away.  This makes the value produced by the
15005       // load dead.
15006       DCI.CombineTo(N, ResVal);
15007 
15008       // Next, combine the load away, we give it a bogus result value but a real
15009       // chain result.  The result value is dead because the bswap is dead.
15010       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15011 
15012       // Return N so it doesn't get rechecked!
15013       return SDValue(N, 0);
15014     }
15015     break;
15016   case PPCISD::VCMP:
15017     // If a VCMPo node already exists with exactly the same operands as this
15018     // node, use its result instead of this node (VCMPo computes both a CR6 and
15019     // a normal output).
15020     //
15021     if (!N->getOperand(0).hasOneUse() &&
15022         !N->getOperand(1).hasOneUse() &&
15023         !N->getOperand(2).hasOneUse()) {
15024 
15025       // Scan all of the users of the LHS, looking for VCMPo's that match.
15026       SDNode *VCMPoNode = nullptr;
15027 
15028       SDNode *LHSN = N->getOperand(0).getNode();
15029       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15030            UI != E; ++UI)
15031         if (UI->getOpcode() == PPCISD::VCMPo &&
15032             UI->getOperand(1) == N->getOperand(1) &&
15033             UI->getOperand(2) == N->getOperand(2) &&
15034             UI->getOperand(0) == N->getOperand(0)) {
15035           VCMPoNode = *UI;
15036           break;
15037         }
15038 
15039       // If there is no VCMPo node, or if the flag value has a single use, don't
15040       // transform this.
15041       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
15042         break;
15043 
15044       // Look at the (necessarily single) use of the flag value.  If it has a
15045       // chain, this transformation is more complex.  Note that multiple things
15046       // could use the value result, which we should ignore.
15047       SDNode *FlagUser = nullptr;
15048       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
15049            FlagUser == nullptr; ++UI) {
15050         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
15051         SDNode *User = *UI;
15052         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15053           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
15054             FlagUser = User;
15055             break;
15056           }
15057         }
15058       }
15059 
15060       // If the user is a MFOCRF instruction, we know this is safe.
15061       // Otherwise we give up for right now.
15062       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15063         return SDValue(VCMPoNode, 0);
15064     }
15065     break;
15066   case ISD::BRCOND: {
15067     SDValue Cond = N->getOperand(1);
15068     SDValue Target = N->getOperand(2);
15069 
15070     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15071         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15072           Intrinsic::loop_decrement) {
15073 
15074       // We now need to make the intrinsic dead (it cannot be instruction
15075       // selected).
15076       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15077       assert(Cond.getNode()->hasOneUse() &&
15078              "Counter decrement has more than one use");
15079 
15080       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15081                          N->getOperand(0), Target);
15082     }
15083   }
15084   break;
15085   case ISD::BR_CC: {
15086     // If this is a branch on an altivec predicate comparison, lower this so
15087     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
15088     // lowering is done pre-legalize, because the legalizer lowers the predicate
15089     // compare down to code that is difficult to reassemble.
15090     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15091     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15092 
15093     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15094     // value. If so, pass-through the AND to get to the intrinsic.
15095     if (LHS.getOpcode() == ISD::AND &&
15096         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15097         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15098           Intrinsic::loop_decrement &&
15099         isa<ConstantSDNode>(LHS.getOperand(1)) &&
15100         !isNullConstant(LHS.getOperand(1)))
15101       LHS = LHS.getOperand(0);
15102 
15103     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15104         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15105           Intrinsic::loop_decrement &&
15106         isa<ConstantSDNode>(RHS)) {
15107       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
15108              "Counter decrement comparison is not EQ or NE");
15109 
15110       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15111       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15112                     (CC == ISD::SETNE && !Val);
15113 
15114       // We now need to make the intrinsic dead (it cannot be instruction
15115       // selected).
15116       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15117       assert(LHS.getNode()->hasOneUse() &&
15118              "Counter decrement has more than one use");
15119 
15120       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15121                          N->getOperand(0), N->getOperand(4));
15122     }
15123 
15124     int CompareOpc;
15125     bool isDot;
15126 
15127     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15128         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15129         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15130       assert(isDot && "Can't compare against a vector result!");
15131 
15132       // If this is a comparison against something other than 0/1, then we know
15133       // that the condition is never/always true.
15134       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15135       if (Val != 0 && Val != 1) {
15136         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
15137           return N->getOperand(0);
15138         // Always !=, turn it into an unconditional branch.
15139         return DAG.getNode(ISD::BR, dl, MVT::Other,
15140                            N->getOperand(0), N->getOperand(4));
15141       }
15142 
15143       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15144 
15145       // Create the PPCISD altivec 'dot' comparison node.
15146       SDValue Ops[] = {
15147         LHS.getOperand(2),  // LHS of compare
15148         LHS.getOperand(3),  // RHS of compare
15149         DAG.getConstant(CompareOpc, dl, MVT::i32)
15150       };
15151       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15152       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
15153 
15154       // Unpack the result based on how the target uses it.
15155       PPC::Predicate CompOpc;
15156       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15157       default:  // Can't happen, don't crash on invalid number though.
15158       case 0:   // Branch on the value of the EQ bit of CR6.
15159         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15160         break;
15161       case 1:   // Branch on the inverted value of the EQ bit of CR6.
15162         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15163         break;
15164       case 2:   // Branch on the value of the LT bit of CR6.
15165         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15166         break;
15167       case 3:   // Branch on the inverted value of the LT bit of CR6.
15168         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15169         break;
15170       }
15171 
15172       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15173                          DAG.getConstant(CompOpc, dl, MVT::i32),
15174                          DAG.getRegister(PPC::CR6, MVT::i32),
15175                          N->getOperand(4), CompNode.getValue(1));
15176     }
15177     break;
15178   }
15179   case ISD::BUILD_VECTOR:
15180     return DAGCombineBuildVector(N, DCI);
15181   case ISD::ABS:
15182     return combineABS(N, DCI);
15183   case ISD::VSELECT:
15184     return combineVSelect(N, DCI);
15185   }
15186 
15187   return SDValue();
15188 }
15189 
15190 SDValue
15191 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
15192                                  SelectionDAG &DAG,
15193                                  SmallVectorImpl<SDNode *> &Created) const {
15194   // fold (sdiv X, pow2)
15195   EVT VT = N->getValueType(0);
15196   if (VT == MVT::i64 && !Subtarget.isPPC64())
15197     return SDValue();
15198   if ((VT != MVT::i32 && VT != MVT::i64) ||
15199       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
15200     return SDValue();
15201 
15202   SDLoc DL(N);
15203   SDValue N0 = N->getOperand(0);
15204 
15205   bool IsNegPow2 = (-Divisor).isPowerOf2();
15206   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15207   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15208 
15209   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15210   Created.push_back(Op.getNode());
15211 
15212   if (IsNegPow2) {
15213     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15214     Created.push_back(Op.getNode());
15215   }
15216 
15217   return Op;
15218 }
15219 
15220 //===----------------------------------------------------------------------===//
15221 // Inline Assembly Support
15222 //===----------------------------------------------------------------------===//
15223 
15224 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15225                                                       KnownBits &Known,
15226                                                       const APInt &DemandedElts,
15227                                                       const SelectionDAG &DAG,
15228                                                       unsigned Depth) const {
15229   Known.resetAll();
15230   switch (Op.getOpcode()) {
15231   default: break;
15232   case PPCISD::LBRX: {
15233     // lhbrx is known to have the top bits cleared out.
15234     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15235       Known.Zero = 0xFFFF0000;
15236     break;
15237   }
15238   case ISD::INTRINSIC_WO_CHAIN: {
15239     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15240     default: break;
15241     case Intrinsic::ppc_altivec_vcmpbfp_p:
15242     case Intrinsic::ppc_altivec_vcmpeqfp_p:
15243     case Intrinsic::ppc_altivec_vcmpequb_p:
15244     case Intrinsic::ppc_altivec_vcmpequh_p:
15245     case Intrinsic::ppc_altivec_vcmpequw_p:
15246     case Intrinsic::ppc_altivec_vcmpequd_p:
15247     case Intrinsic::ppc_altivec_vcmpequq_p:
15248     case Intrinsic::ppc_altivec_vcmpgefp_p:
15249     case Intrinsic::ppc_altivec_vcmpgtfp_p:
15250     case Intrinsic::ppc_altivec_vcmpgtsb_p:
15251     case Intrinsic::ppc_altivec_vcmpgtsh_p:
15252     case Intrinsic::ppc_altivec_vcmpgtsw_p:
15253     case Intrinsic::ppc_altivec_vcmpgtsd_p:
15254     case Intrinsic::ppc_altivec_vcmpgtsq_p:
15255     case Intrinsic::ppc_altivec_vcmpgtub_p:
15256     case Intrinsic::ppc_altivec_vcmpgtuh_p:
15257     case Intrinsic::ppc_altivec_vcmpgtuw_p:
15258     case Intrinsic::ppc_altivec_vcmpgtud_p:
15259     case Intrinsic::ppc_altivec_vcmpgtuq_p:
15260       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
15261       break;
15262     }
15263   }
15264   }
15265 }
15266 
15267 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
15268   switch (Subtarget.getCPUDirective()) {
15269   default: break;
15270   case PPC::DIR_970:
15271   case PPC::DIR_PWR4:
15272   case PPC::DIR_PWR5:
15273   case PPC::DIR_PWR5X:
15274   case PPC::DIR_PWR6:
15275   case PPC::DIR_PWR6X:
15276   case PPC::DIR_PWR7:
15277   case PPC::DIR_PWR8:
15278   case PPC::DIR_PWR9:
15279   case PPC::DIR_PWR10:
15280   case PPC::DIR_PWR_FUTURE: {
15281     if (!ML)
15282       break;
15283 
15284     if (!DisableInnermostLoopAlign32) {
15285       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15286       // so that we can decrease cache misses and branch-prediction misses.
15287       // Actual alignment of the loop will depend on the hotness check and other
15288       // logic in alignBlocks.
15289       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15290         return Align(32);
15291     }
15292 
15293     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15294 
15295     // For small loops (between 5 and 8 instructions), align to a 32-byte
15296     // boundary so that the entire loop fits in one instruction-cache line.
15297     uint64_t LoopSize = 0;
15298     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15299       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15300         LoopSize += TII->getInstSizeInBytes(*J);
15301         if (LoopSize > 32)
15302           break;
15303       }
15304 
15305     if (LoopSize > 16 && LoopSize <= 32)
15306       return Align(32);
15307 
15308     break;
15309   }
15310   }
15311 
15312   return TargetLowering::getPrefLoopAlignment(ML);
15313 }
15314 
15315 /// getConstraintType - Given a constraint, return the type of
15316 /// constraint it is for this target.
15317 PPCTargetLowering::ConstraintType
15318 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
15319   if (Constraint.size() == 1) {
15320     switch (Constraint[0]) {
15321     default: break;
15322     case 'b':
15323     case 'r':
15324     case 'f':
15325     case 'd':
15326     case 'v':
15327     case 'y':
15328       return C_RegisterClass;
15329     case 'Z':
15330       // FIXME: While Z does indicate a memory constraint, it specifically
15331       // indicates an r+r address (used in conjunction with the 'y' modifier
15332       // in the replacement string). Currently, we're forcing the base
15333       // register to be r0 in the asm printer (which is interpreted as zero)
15334       // and forming the complete address in the second register. This is
15335       // suboptimal.
15336       return C_Memory;
15337     }
15338   } else if (Constraint == "wc") { // individual CR bits.
15339     return C_RegisterClass;
15340   } else if (Constraint == "wa" || Constraint == "wd" ||
15341              Constraint == "wf" || Constraint == "ws" ||
15342              Constraint == "wi" || Constraint == "ww") {
15343     return C_RegisterClass; // VSX registers.
15344   }
15345   return TargetLowering::getConstraintType(Constraint);
15346 }
15347 
15348 /// Examine constraint type and operand type and determine a weight value.
15349 /// This object must already have been set up with the operand type
15350 /// and the current alternative constraint selected.
15351 TargetLowering::ConstraintWeight
15352 PPCTargetLowering::getSingleConstraintMatchWeight(
15353     AsmOperandInfo &info, const char *constraint) const {
15354   ConstraintWeight weight = CW_Invalid;
15355   Value *CallOperandVal = info.CallOperandVal;
15356     // If we don't have a value, we can't do a match,
15357     // but allow it at the lowest weight.
15358   if (!CallOperandVal)
15359     return CW_Default;
15360   Type *type = CallOperandVal->getType();
15361 
15362   // Look at the constraint type.
15363   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15364     return CW_Register; // an individual CR bit.
15365   else if ((StringRef(constraint) == "wa" ||
15366             StringRef(constraint) == "wd" ||
15367             StringRef(constraint) == "wf") &&
15368            type->isVectorTy())
15369     return CW_Register;
15370   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15371     return CW_Register; // just hold 64-bit integers data.
15372   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15373     return CW_Register;
15374   else if (StringRef(constraint) == "ww" && type->isFloatTy())
15375     return CW_Register;
15376 
15377   switch (*constraint) {
15378   default:
15379     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15380     break;
15381   case 'b':
15382     if (type->isIntegerTy())
15383       weight = CW_Register;
15384     break;
15385   case 'f':
15386     if (type->isFloatTy())
15387       weight = CW_Register;
15388     break;
15389   case 'd':
15390     if (type->isDoubleTy())
15391       weight = CW_Register;
15392     break;
15393   case 'v':
15394     if (type->isVectorTy())
15395       weight = CW_Register;
15396     break;
15397   case 'y':
15398     weight = CW_Register;
15399     break;
15400   case 'Z':
15401     weight = CW_Memory;
15402     break;
15403   }
15404   return weight;
15405 }
15406 
15407 std::pair<unsigned, const TargetRegisterClass *>
15408 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
15409                                                 StringRef Constraint,
15410                                                 MVT VT) const {
15411   if (Constraint.size() == 1) {
15412     // GCC RS6000 Constraint Letters
15413     switch (Constraint[0]) {
15414     case 'b':   // R1-R31
15415       if (VT == MVT::i64 && Subtarget.isPPC64())
15416         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15417       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15418     case 'r':   // R0-R31
15419       if (VT == MVT::i64 && Subtarget.isPPC64())
15420         return std::make_pair(0U, &PPC::G8RCRegClass);
15421       return std::make_pair(0U, &PPC::GPRCRegClass);
15422     // 'd' and 'f' constraints are both defined to be "the floating point
15423     // registers", where one is for 32-bit and the other for 64-bit. We don't
15424     // really care overly much here so just give them all the same reg classes.
15425     case 'd':
15426     case 'f':
15427       if (Subtarget.hasSPE()) {
15428         if (VT == MVT::f32 || VT == MVT::i32)
15429           return std::make_pair(0U, &PPC::GPRCRegClass);
15430         if (VT == MVT::f64 || VT == MVT::i64)
15431           return std::make_pair(0U, &PPC::SPERCRegClass);
15432       } else {
15433         if (VT == MVT::f32 || VT == MVT::i32)
15434           return std::make_pair(0U, &PPC::F4RCRegClass);
15435         if (VT == MVT::f64 || VT == MVT::i64)
15436           return std::make_pair(0U, &PPC::F8RCRegClass);
15437       }
15438       break;
15439     case 'v':
15440       if (Subtarget.hasAltivec())
15441         return std::make_pair(0U, &PPC::VRRCRegClass);
15442       break;
15443     case 'y':   // crrc
15444       return std::make_pair(0U, &PPC::CRRCRegClass);
15445     }
15446   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15447     // An individual CR bit.
15448     return std::make_pair(0U, &PPC::CRBITRCRegClass);
15449   } else if ((Constraint == "wa" || Constraint == "wd" ||
15450              Constraint == "wf" || Constraint == "wi") &&
15451              Subtarget.hasVSX()) {
15452     return std::make_pair(0U, &PPC::VSRCRegClass);
15453   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15454     if (VT == MVT::f32 && Subtarget.hasP8Vector())
15455       return std::make_pair(0U, &PPC::VSSRCRegClass);
15456     else
15457       return std::make_pair(0U, &PPC::VSFRCRegClass);
15458   }
15459 
15460   // If we name a VSX register, we can't defer to the base class because it
15461   // will not recognize the correct register (their names will be VSL{0-31}
15462   // and V{0-31} so they won't match). So we match them here.
15463   if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15464     int VSNum = atoi(Constraint.data() + 3);
15465     assert(VSNum >= 0 && VSNum <= 63 &&
15466            "Attempted to access a vsr out of range");
15467     if (VSNum < 32)
15468       return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15469     return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15470   }
15471   std::pair<unsigned, const TargetRegisterClass *> R =
15472       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
15473 
15474   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15475   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15476   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15477   // register.
15478   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15479   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15480   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15481       PPC::GPRCRegClass.contains(R.first))
15482     return std::make_pair(TRI->getMatchingSuperReg(R.first,
15483                             PPC::sub_32, &PPC::G8RCRegClass),
15484                           &PPC::G8RCRegClass);
15485 
15486   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15487   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15488     R.first = PPC::CR0;
15489     R.second = &PPC::CRRCRegClass;
15490   }
15491 
15492   return R;
15493 }
15494 
15495 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15496 /// vector.  If it is invalid, don't add anything to Ops.
15497 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15498                                                      std::string &Constraint,
15499                                                      std::vector<SDValue>&Ops,
15500                                                      SelectionDAG &DAG) const {
15501   SDValue Result;
15502 
15503   // Only support length 1 constraints.
15504   if (Constraint.length() > 1) return;
15505 
15506   char Letter = Constraint[0];
15507   switch (Letter) {
15508   default: break;
15509   case 'I':
15510   case 'J':
15511   case 'K':
15512   case 'L':
15513   case 'M':
15514   case 'N':
15515   case 'O':
15516   case 'P': {
15517     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15518     if (!CST) return; // Must be an immediate to match.
15519     SDLoc dl(Op);
15520     int64_t Value = CST->getSExtValue();
15521     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15522                          // numbers are printed as such.
15523     switch (Letter) {
15524     default: llvm_unreachable("Unknown constraint letter!");
15525     case 'I':  // "I" is a signed 16-bit constant.
15526       if (isInt<16>(Value))
15527         Result = DAG.getTargetConstant(Value, dl, TCVT);
15528       break;
15529     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
15530       if (isShiftedUInt<16, 16>(Value))
15531         Result = DAG.getTargetConstant(Value, dl, TCVT);
15532       break;
15533     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
15534       if (isShiftedInt<16, 16>(Value))
15535         Result = DAG.getTargetConstant(Value, dl, TCVT);
15536       break;
15537     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
15538       if (isUInt<16>(Value))
15539         Result = DAG.getTargetConstant(Value, dl, TCVT);
15540       break;
15541     case 'M':  // "M" is a constant that is greater than 31.
15542       if (Value > 31)
15543         Result = DAG.getTargetConstant(Value, dl, TCVT);
15544       break;
15545     case 'N':  // "N" is a positive constant that is an exact power of two.
15546       if (Value > 0 && isPowerOf2_64(Value))
15547         Result = DAG.getTargetConstant(Value, dl, TCVT);
15548       break;
15549     case 'O':  // "O" is the constant zero.
15550       if (Value == 0)
15551         Result = DAG.getTargetConstant(Value, dl, TCVT);
15552       break;
15553     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
15554       if (isInt<16>(-Value))
15555         Result = DAG.getTargetConstant(Value, dl, TCVT);
15556       break;
15557     }
15558     break;
15559   }
15560   }
15561 
15562   if (Result.getNode()) {
15563     Ops.push_back(Result);
15564     return;
15565   }
15566 
15567   // Handle standard constraint letters.
15568   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15569 }
15570 
15571 // isLegalAddressingMode - Return true if the addressing mode represented
15572 // by AM is legal for this target, for a load/store of the specified type.
15573 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
15574                                               const AddrMode &AM, Type *Ty,
15575                                               unsigned AS,
15576                                               Instruction *I) const {
15577   // Vector type r+i form is supported since power9 as DQ form. We don't check
15578   // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15579   // imm form is preferred and the offset can be adjusted to use imm form later
15580   // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15581   // max offset to check legal addressing mode, we should be a little aggressive
15582   // to contain other offsets for that LSRUse.
15583   if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15584     return false;
15585 
15586   // PPC allows a sign-extended 16-bit immediate field.
15587   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15588     return false;
15589 
15590   // No global is ever allowed as a base.
15591   if (AM.BaseGV)
15592     return false;
15593 
15594   // PPC only support r+r,
15595   switch (AM.Scale) {
15596   case 0:  // "r+i" or just "i", depending on HasBaseReg.
15597     break;
15598   case 1:
15599     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
15600       return false;
15601     // Otherwise we have r+r or r+i.
15602     break;
15603   case 2:
15604     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
15605       return false;
15606     // Allow 2*r as r+r.
15607     break;
15608   default:
15609     // No other scales are supported.
15610     return false;
15611   }
15612 
15613   return true;
15614 }
15615 
15616 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15617                                            SelectionDAG &DAG) const {
15618   MachineFunction &MF = DAG.getMachineFunction();
15619   MachineFrameInfo &MFI = MF.getFrameInfo();
15620   MFI.setReturnAddressIsTaken(true);
15621 
15622   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
15623     return SDValue();
15624 
15625   SDLoc dl(Op);
15626   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15627 
15628   // Make sure the function does not optimize away the store of the RA to
15629   // the stack.
15630   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15631   FuncInfo->setLRStoreRequired();
15632   bool isPPC64 = Subtarget.isPPC64();
15633   auto PtrVT = getPointerTy(MF.getDataLayout());
15634 
15635   if (Depth > 0) {
15636     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15637     SDValue Offset =
15638         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15639                         isPPC64 ? MVT::i64 : MVT::i32);
15640     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15641                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15642                        MachinePointerInfo());
15643   }
15644 
15645   // Just load the return address off the stack.
15646   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15647   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15648                      MachinePointerInfo());
15649 }
15650 
15651 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15652                                           SelectionDAG &DAG) const {
15653   SDLoc dl(Op);
15654   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15655 
15656   MachineFunction &MF = DAG.getMachineFunction();
15657   MachineFrameInfo &MFI = MF.getFrameInfo();
15658   MFI.setFrameAddressIsTaken(true);
15659 
15660   EVT PtrVT = getPointerTy(MF.getDataLayout());
15661   bool isPPC64 = PtrVT == MVT::i64;
15662 
15663   // Naked functions never have a frame pointer, and so we use r1. For all
15664   // other functions, this decision must be delayed until during PEI.
15665   unsigned FrameReg;
15666   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15667     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15668   else
15669     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15670 
15671   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15672                                          PtrVT);
15673   while (Depth--)
15674     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15675                             FrameAddr, MachinePointerInfo());
15676   return FrameAddr;
15677 }
15678 
15679 // FIXME? Maybe this could be a TableGen attribute on some registers and
15680 // this table could be generated automatically from RegInfo.
15681 Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
15682                                               const MachineFunction &MF) const {
15683   bool isPPC64 = Subtarget.isPPC64();
15684 
15685   bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15686   if (!is64Bit && VT != LLT::scalar(32))
15687     report_fatal_error("Invalid register global variable type");
15688 
15689   Register Reg = StringSwitch<Register>(RegName)
15690                      .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15691                      .Case("r2", isPPC64 ? Register() : PPC::R2)
15692                      .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15693                      .Default(Register());
15694 
15695   if (Reg)
15696     return Reg;
15697   report_fatal_error("Invalid register name global variable");
15698 }
15699 
15700 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
15701   // 32-bit SVR4 ABI access everything as got-indirect.
15702   if (Subtarget.is32BitELFABI())
15703     return true;
15704 
15705   // AIX accesses everything indirectly through the TOC, which is similar to
15706   // the GOT.
15707   if (Subtarget.isAIXABI())
15708     return true;
15709 
15710   CodeModel::Model CModel = getTargetMachine().getCodeModel();
15711   // If it is small or large code model, module locals are accessed
15712   // indirectly by loading their address from .toc/.got.
15713   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15714     return true;
15715 
15716   // JumpTable and BlockAddress are accessed as got-indirect.
15717   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15718     return true;
15719 
15720   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15721     return Subtarget.isGVIndirectSymbol(G->getGlobal());
15722 
15723   return false;
15724 }
15725 
15726 bool
15727 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
15728   // The PowerPC target isn't yet aware of offsets.
15729   return false;
15730 }
15731 
15732 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
15733                                            const CallInst &I,
15734                                            MachineFunction &MF,
15735                                            unsigned Intrinsic) const {
15736   switch (Intrinsic) {
15737   case Intrinsic::ppc_altivec_lvx:
15738   case Intrinsic::ppc_altivec_lvxl:
15739   case Intrinsic::ppc_altivec_lvebx:
15740   case Intrinsic::ppc_altivec_lvehx:
15741   case Intrinsic::ppc_altivec_lvewx:
15742   case Intrinsic::ppc_vsx_lxvd2x:
15743   case Intrinsic::ppc_vsx_lxvw4x: {
15744     EVT VT;
15745     switch (Intrinsic) {
15746     case Intrinsic::ppc_altivec_lvebx:
15747       VT = MVT::i8;
15748       break;
15749     case Intrinsic::ppc_altivec_lvehx:
15750       VT = MVT::i16;
15751       break;
15752     case Intrinsic::ppc_altivec_lvewx:
15753       VT = MVT::i32;
15754       break;
15755     case Intrinsic::ppc_vsx_lxvd2x:
15756       VT = MVT::v2f64;
15757       break;
15758     default:
15759       VT = MVT::v4i32;
15760       break;
15761     }
15762 
15763     Info.opc = ISD::INTRINSIC_W_CHAIN;
15764     Info.memVT = VT;
15765     Info.ptrVal = I.getArgOperand(0);
15766     Info.offset = -VT.getStoreSize()+1;
15767     Info.size = 2*VT.getStoreSize()-1;
15768     Info.align = Align(1);
15769     Info.flags = MachineMemOperand::MOLoad;
15770     return true;
15771   }
15772   case Intrinsic::ppc_altivec_stvx:
15773   case Intrinsic::ppc_altivec_stvxl:
15774   case Intrinsic::ppc_altivec_stvebx:
15775   case Intrinsic::ppc_altivec_stvehx:
15776   case Intrinsic::ppc_altivec_stvewx:
15777   case Intrinsic::ppc_vsx_stxvd2x:
15778   case Intrinsic::ppc_vsx_stxvw4x: {
15779     EVT VT;
15780     switch (Intrinsic) {
15781     case Intrinsic::ppc_altivec_stvebx:
15782       VT = MVT::i8;
15783       break;
15784     case Intrinsic::ppc_altivec_stvehx:
15785       VT = MVT::i16;
15786       break;
15787     case Intrinsic::ppc_altivec_stvewx:
15788       VT = MVT::i32;
15789       break;
15790     case Intrinsic::ppc_vsx_stxvd2x:
15791       VT = MVT::v2f64;
15792       break;
15793     default:
15794       VT = MVT::v4i32;
15795       break;
15796     }
15797 
15798     Info.opc = ISD::INTRINSIC_VOID;
15799     Info.memVT = VT;
15800     Info.ptrVal = I.getArgOperand(1);
15801     Info.offset = -VT.getStoreSize()+1;
15802     Info.size = 2*VT.getStoreSize()-1;
15803     Info.align = Align(1);
15804     Info.flags = MachineMemOperand::MOStore;
15805     return true;
15806   }
15807   default:
15808     break;
15809   }
15810 
15811   return false;
15812 }
15813 
15814 /// It returns EVT::Other if the type should be determined using generic
15815 /// target-independent logic.
15816 EVT PPCTargetLowering::getOptimalMemOpType(
15817     const MemOp &Op, const AttributeList &FuncAttributes) const {
15818   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
15819     // We should use Altivec/VSX loads and stores when available. For unaligned
15820     // addresses, unaligned VSX loads are only fast starting with the P8.
15821     if (Subtarget.hasAltivec() && Op.size() >= 16 &&
15822         (Op.isAligned(Align(16)) ||
15823          ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
15824       return MVT::v4i32;
15825   }
15826 
15827   if (Subtarget.isPPC64()) {
15828     return MVT::i64;
15829   }
15830 
15831   return MVT::i32;
15832 }
15833 
15834 /// Returns true if it is beneficial to convert a load of a constant
15835 /// to just the constant itself.
15836 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
15837                                                           Type *Ty) const {
15838   assert(Ty->isIntegerTy());
15839 
15840   unsigned BitSize = Ty->getPrimitiveSizeInBits();
15841   return !(BitSize == 0 || BitSize > 64);
15842 }
15843 
15844 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
15845   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15846     return false;
15847   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15848   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15849   return NumBits1 == 64 && NumBits2 == 32;
15850 }
15851 
15852 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
15853   if (!VT1.isInteger() || !VT2.isInteger())
15854     return false;
15855   unsigned NumBits1 = VT1.getSizeInBits();
15856   unsigned NumBits2 = VT2.getSizeInBits();
15857   return NumBits1 == 64 && NumBits2 == 32;
15858 }
15859 
15860 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
15861   // Generally speaking, zexts are not free, but they are free when they can be
15862   // folded with other operations.
15863   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
15864     EVT MemVT = LD->getMemoryVT();
15865     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
15866          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
15867         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
15868          LD->getExtensionType() == ISD::ZEXTLOAD))
15869       return true;
15870   }
15871 
15872   // FIXME: Add other cases...
15873   //  - 32-bit shifts with a zext to i64
15874   //  - zext after ctlz, bswap, etc.
15875   //  - zext after and by a constant mask
15876 
15877   return TargetLowering::isZExtFree(Val, VT2);
15878 }
15879 
15880 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
15881   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
15882          "invalid fpext types");
15883   // Extending to float128 is not free.
15884   if (DestVT == MVT::f128)
15885     return false;
15886   return true;
15887 }
15888 
15889 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
15890   return isInt<16>(Imm) || isUInt<16>(Imm);
15891 }
15892 
15893 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
15894   return isInt<16>(Imm) || isUInt<16>(Imm);
15895 }
15896 
15897 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
15898                                                        unsigned,
15899                                                        unsigned,
15900                                                        MachineMemOperand::Flags,
15901                                                        bool *Fast) const {
15902   if (DisablePPCUnaligned)
15903     return false;
15904 
15905   // PowerPC supports unaligned memory access for simple non-vector types.
15906   // Although accessing unaligned addresses is not as efficient as accessing
15907   // aligned addresses, it is generally more efficient than manual expansion,
15908   // and generally only traps for software emulation when crossing page
15909   // boundaries.
15910 
15911   if (!VT.isSimple())
15912     return false;
15913 
15914   if (VT.isFloatingPoint() && !VT.isVector() &&
15915       !Subtarget.allowsUnalignedFPAccess())
15916     return false;
15917 
15918   if (VT.getSimpleVT().isVector()) {
15919     if (Subtarget.hasVSX()) {
15920       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
15921           VT != MVT::v4f32 && VT != MVT::v4i32)
15922         return false;
15923     } else {
15924       return false;
15925     }
15926   }
15927 
15928   if (VT == MVT::ppcf128)
15929     return false;
15930 
15931   if (Fast)
15932     *Fast = true;
15933 
15934   return true;
15935 }
15936 
15937 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
15938                                                    EVT VT) const {
15939   return isFMAFasterThanFMulAndFAdd(
15940       MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
15941 }
15942 
15943 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
15944                                                    Type *Ty) const {
15945   switch (Ty->getScalarType()->getTypeID()) {
15946   case Type::FloatTyID:
15947   case Type::DoubleTyID:
15948     return true;
15949   case Type::FP128TyID:
15950     return Subtarget.hasP9Vector();
15951   default:
15952     return false;
15953   }
15954 }
15955 
15956 // FIXME: add more patterns which are not profitable to hoist.
15957 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
15958   if (!I->hasOneUse())
15959     return true;
15960 
15961   Instruction *User = I->user_back();
15962   assert(User && "A single use instruction with no uses.");
15963 
15964   switch (I->getOpcode()) {
15965   case Instruction::FMul: {
15966     // Don't break FMA, PowerPC prefers FMA.
15967     if (User->getOpcode() != Instruction::FSub &&
15968         User->getOpcode() != Instruction::FAdd)
15969       return true;
15970 
15971     const TargetOptions &Options = getTargetMachine().Options;
15972     const Function *F = I->getFunction();
15973     const DataLayout &DL = F->getParent()->getDataLayout();
15974     Type *Ty = User->getOperand(0)->getType();
15975 
15976     return !(
15977         isFMAFasterThanFMulAndFAdd(*F, Ty) &&
15978         isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
15979         (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
15980   }
15981   case Instruction::Load: {
15982     // Don't break "store (load float*)" pattern, this pattern will be combined
15983     // to "store (load int32)" in later InstCombine pass. See function
15984     // combineLoadToOperationType. On PowerPC, loading a float point takes more
15985     // cycles than loading a 32 bit integer.
15986     LoadInst *LI = cast<LoadInst>(I);
15987     // For the loads that combineLoadToOperationType does nothing, like
15988     // ordered load, it should be profitable to hoist them.
15989     // For swifterror load, it can only be used for pointer to pointer type, so
15990     // later type check should get rid of this case.
15991     if (!LI->isUnordered())
15992       return true;
15993 
15994     if (User->getOpcode() != Instruction::Store)
15995       return true;
15996 
15997     if (I->getType()->getTypeID() != Type::FloatTyID)
15998       return true;
15999 
16000     return false;
16001   }
16002   default:
16003     return true;
16004   }
16005   return true;
16006 }
16007 
16008 const MCPhysReg *
16009 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
16010   // LR is a callee-save register, but we must treat it as clobbered by any call
16011   // site. Hence we include LR in the scratch registers, which are in turn added
16012   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16013   // to CTR, which is used by any indirect call.
16014   static const MCPhysReg ScratchRegs[] = {
16015     PPC::X12, PPC::LR8, PPC::CTR8, 0
16016   };
16017 
16018   return ScratchRegs;
16019 }
16020 
16021 Register PPCTargetLowering::getExceptionPointerRegister(
16022     const Constant *PersonalityFn) const {
16023   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16024 }
16025 
16026 Register PPCTargetLowering::getExceptionSelectorRegister(
16027     const Constant *PersonalityFn) const {
16028   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16029 }
16030 
16031 bool
16032 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
16033                      EVT VT , unsigned DefinedValues) const {
16034   if (VT == MVT::v2i64)
16035     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16036 
16037   if (Subtarget.hasVSX())
16038     return true;
16039 
16040   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
16041 }
16042 
16043 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
16044   if (DisableILPPref || Subtarget.enableMachineScheduler())
16045     return TargetLowering::getSchedulingPreference(N);
16046 
16047   return Sched::ILP;
16048 }
16049 
16050 // Create a fast isel object.
16051 FastISel *
16052 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
16053                                   const TargetLibraryInfo *LibInfo) const {
16054   return PPC::createFastISel(FuncInfo, LibInfo);
16055 }
16056 
16057 // 'Inverted' means the FMA opcode after negating one multiplicand.
16058 // For example, (fma -a b c) = (fnmsub a b c)
16059 static unsigned invertFMAOpcode(unsigned Opc) {
16060   switch (Opc) {
16061   default:
16062     llvm_unreachable("Invalid FMA opcode for PowerPC!");
16063   case ISD::FMA:
16064     return PPCISD::FNMSUB;
16065   case PPCISD::FNMSUB:
16066     return ISD::FMA;
16067   }
16068 }
16069 
16070 SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
16071                                                 bool LegalOps, bool OptForSize,
16072                                                 NegatibleCost &Cost,
16073                                                 unsigned Depth) const {
16074   if (Depth > SelectionDAG::MaxRecursionDepth)
16075     return SDValue();
16076 
16077   unsigned Opc = Op.getOpcode();
16078   EVT VT = Op.getValueType();
16079   SDNodeFlags Flags = Op.getNode()->getFlags();
16080 
16081   switch (Opc) {
16082   case PPCISD::FNMSUB:
16083     if (!Op.hasOneUse() || !isTypeLegal(VT))
16084       break;
16085 
16086     const TargetOptions &Options = getTargetMachine().Options;
16087     SDValue N0 = Op.getOperand(0);
16088     SDValue N1 = Op.getOperand(1);
16089     SDValue N2 = Op.getOperand(2);
16090     SDLoc Loc(Op);
16091 
16092     NegatibleCost N2Cost = NegatibleCost::Expensive;
16093     SDValue NegN2 =
16094         getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16095 
16096     if (!NegN2)
16097       return SDValue();
16098 
16099     // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16100     // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16101     // These transformations may change sign of zeroes. For example,
16102     // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16103     if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16104       // Try and choose the cheaper one to negate.
16105       NegatibleCost N0Cost = NegatibleCost::Expensive;
16106       SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16107                                            N0Cost, Depth + 1);
16108 
16109       NegatibleCost N1Cost = NegatibleCost::Expensive;
16110       SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16111                                            N1Cost, Depth + 1);
16112 
16113       if (NegN0 && N0Cost <= N1Cost) {
16114         Cost = std::min(N0Cost, N2Cost);
16115         return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16116       } else if (NegN1) {
16117         Cost = std::min(N1Cost, N2Cost);
16118         return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16119       }
16120     }
16121 
16122     // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16123     if (isOperationLegal(ISD::FMA, VT)) {
16124       Cost = N2Cost;
16125       return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16126     }
16127 
16128     break;
16129   }
16130 
16131   return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16132                                               Cost, Depth);
16133 }
16134 
16135 // Override to enable LOAD_STACK_GUARD lowering on Linux.
16136 bool PPCTargetLowering::useLoadStackGuardNode() const {
16137   if (!Subtarget.isTargetLinux())
16138     return TargetLowering::useLoadStackGuardNode();
16139   return true;
16140 }
16141 
16142 // Override to disable global variable loading on Linux.
16143 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
16144   if (!Subtarget.isTargetLinux())
16145     return TargetLowering::insertSSPDeclarations(M);
16146 }
16147 
16148 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
16149                                      bool ForCodeSize) const {
16150   if (!VT.isSimple() || !Subtarget.hasVSX())
16151     return false;
16152 
16153   switch(VT.getSimpleVT().SimpleTy) {
16154   default:
16155     // For FP types that are currently not supported by PPC backend, return
16156     // false. Examples: f16, f80.
16157     return false;
16158   case MVT::f32:
16159   case MVT::f64:
16160     if (Subtarget.hasPrefixInstrs()) {
16161       // With prefixed instructions, we can materialize anything that can be
16162       // represented with a 32-bit immediate, not just positive zero.
16163       APFloat APFloatOfImm = Imm;
16164       return convertToNonDenormSingle(APFloatOfImm);
16165     }
16166     LLVM_FALLTHROUGH;
16167   case MVT::ppcf128:
16168     return Imm.isPosZero();
16169   }
16170 }
16171 
16172 // For vector shift operation op, fold
16173 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16174 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
16175                                   SelectionDAG &DAG) {
16176   SDValue N0 = N->getOperand(0);
16177   SDValue N1 = N->getOperand(1);
16178   EVT VT = N0.getValueType();
16179   unsigned OpSizeInBits = VT.getScalarSizeInBits();
16180   unsigned Opcode = N->getOpcode();
16181   unsigned TargetOpcode;
16182 
16183   switch (Opcode) {
16184   default:
16185     llvm_unreachable("Unexpected shift operation");
16186   case ISD::SHL:
16187     TargetOpcode = PPCISD::SHL;
16188     break;
16189   case ISD::SRL:
16190     TargetOpcode = PPCISD::SRL;
16191     break;
16192   case ISD::SRA:
16193     TargetOpcode = PPCISD::SRA;
16194     break;
16195   }
16196 
16197   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16198       N1->getOpcode() == ISD::AND)
16199     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
16200       if (Mask->getZExtValue() == OpSizeInBits - 1)
16201         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16202 
16203   return SDValue();
16204 }
16205 
16206 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
16207   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16208     return Value;
16209 
16210   SDValue N0 = N->getOperand(0);
16211   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16212   if (!Subtarget.isISA3_0() ||
16213       N0.getOpcode() != ISD::SIGN_EXTEND ||
16214       N0.getOperand(0).getValueType() != MVT::i32 ||
16215       CN1 == nullptr || N->getValueType(0) != MVT::i64)
16216     return SDValue();
16217 
16218   // We can't save an operation here if the value is already extended, and
16219   // the existing shift is easier to combine.
16220   SDValue ExtsSrc = N0.getOperand(0);
16221   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
16222       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
16223     return SDValue();
16224 
16225   SDLoc DL(N0);
16226   SDValue ShiftBy = SDValue(CN1, 0);
16227   // We want the shift amount to be i32 on the extswli, but the shift could
16228   // have an i64.
16229   if (ShiftBy.getValueType() == MVT::i64)
16230     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
16231 
16232   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
16233                          ShiftBy);
16234 }
16235 
16236 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
16237   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16238     return Value;
16239 
16240   return SDValue();
16241 }
16242 
16243 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
16244   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16245     return Value;
16246 
16247   return SDValue();
16248 }
16249 
16250 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16251 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16252 // When C is zero, the equation (addi Z, -C) can be simplified to Z
16253 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16254 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
16255                                  const PPCSubtarget &Subtarget) {
16256   if (!Subtarget.isPPC64())
16257     return SDValue();
16258 
16259   SDValue LHS = N->getOperand(0);
16260   SDValue RHS = N->getOperand(1);
16261 
16262   auto isZextOfCompareWithConstant = [](SDValue Op) {
16263     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16264         Op.getValueType() != MVT::i64)
16265       return false;
16266 
16267     SDValue Cmp = Op.getOperand(0);
16268     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16269         Cmp.getOperand(0).getValueType() != MVT::i64)
16270       return false;
16271 
16272     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16273       int64_t NegConstant = 0 - Constant->getSExtValue();
16274       // Due to the limitations of the addi instruction,
16275       // -C is required to be [-32768, 32767].
16276       return isInt<16>(NegConstant);
16277     }
16278 
16279     return false;
16280   };
16281 
16282   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16283   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16284 
16285   // If there is a pattern, canonicalize a zext operand to the RHS.
16286   if (LHSHasPattern && !RHSHasPattern)
16287     std::swap(LHS, RHS);
16288   else if (!LHSHasPattern && !RHSHasPattern)
16289     return SDValue();
16290 
16291   SDLoc DL(N);
16292   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16293   SDValue Cmp = RHS.getOperand(0);
16294   SDValue Z = Cmp.getOperand(0);
16295   auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16296 
16297   assert(Constant && "Constant Should not be a null pointer.");
16298   int64_t NegConstant = 0 - Constant->getSExtValue();
16299 
16300   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16301   default: break;
16302   case ISD::SETNE: {
16303     //                                 when C == 0
16304     //                             --> addze X, (addic Z, -1).carry
16305     //                            /
16306     // add X, (zext(setne Z, C))--
16307     //                            \    when -32768 <= -C <= 32767 && C != 0
16308     //                             --> addze X, (addic (addi Z, -C), -1).carry
16309     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16310                               DAG.getConstant(NegConstant, DL, MVT::i64));
16311     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16312     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16313                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16314     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16315                        SDValue(Addc.getNode(), 1));
16316     }
16317   case ISD::SETEQ: {
16318     //                                 when C == 0
16319     //                             --> addze X, (subfic Z, 0).carry
16320     //                            /
16321     // add X, (zext(sete  Z, C))--
16322     //                            \    when -32768 <= -C <= 32767 && C != 0
16323     //                             --> addze X, (subfic (addi Z, -C), 0).carry
16324     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16325                               DAG.getConstant(NegConstant, DL, MVT::i64));
16326     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16327     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16328                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16329     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16330                        SDValue(Subc.getNode(), 1));
16331     }
16332   }
16333 
16334   return SDValue();
16335 }
16336 
16337 // Transform
16338 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16339 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16340 // In this case both C1 and C2 must be known constants.
16341 // C1+C2 must fit into a 34 bit signed integer.
16342 static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
16343                                           const PPCSubtarget &Subtarget) {
16344   if (!Subtarget.isUsingPCRelativeCalls())
16345     return SDValue();
16346 
16347   // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16348   // If we find that node try to cast the Global Address and the Constant.
16349   SDValue LHS = N->getOperand(0);
16350   SDValue RHS = N->getOperand(1);
16351 
16352   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16353     std::swap(LHS, RHS);
16354 
16355   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16356     return SDValue();
16357 
16358   // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16359   GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16360   ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16361 
16362   // Check that both casts succeeded.
16363   if (!GSDN || !ConstNode)
16364     return SDValue();
16365 
16366   int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16367   SDLoc DL(GSDN);
16368 
16369   // The signed int offset needs to fit in 34 bits.
16370   if (!isInt<34>(NewOffset))
16371     return SDValue();
16372 
16373   // The new global address is a copy of the old global address except
16374   // that it has the updated Offset.
16375   SDValue GA =
16376       DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16377                                  NewOffset, GSDN->getTargetFlags());
16378   SDValue MatPCRel =
16379       DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16380   return MatPCRel;
16381 }
16382 
16383 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16384   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16385     return Value;
16386 
16387   if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16388     return Value;
16389 
16390   return SDValue();
16391 }
16392 
16393 // Detect TRUNCATE operations on bitcasts of float128 values.
16394 // What we are looking for here is the situtation where we extract a subset
16395 // of bits from a 128 bit float.
16396 // This can be of two forms:
16397 // 1) BITCAST of f128 feeding TRUNCATE
16398 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16399 // The reason this is required is because we do not have a legal i128 type
16400 // and so we want to prevent having to store the f128 and then reload part
16401 // of it.
16402 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16403                                            DAGCombinerInfo &DCI) const {
16404   // If we are using CRBits then try that first.
16405   if (Subtarget.useCRBits()) {
16406     // Check if CRBits did anything and return that if it did.
16407     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16408       return CRTruncValue;
16409   }
16410 
16411   SDLoc dl(N);
16412   SDValue Op0 = N->getOperand(0);
16413 
16414   // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16415   if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16416     EVT VT = N->getValueType(0);
16417     if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16418       return SDValue();
16419     SDValue Sub = Op0.getOperand(0);
16420     if (Sub.getOpcode() == ISD::SUB) {
16421       SDValue SubOp0 = Sub.getOperand(0);
16422       SDValue SubOp1 = Sub.getOperand(1);
16423       if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16424           (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16425         return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16426                                SubOp1.getOperand(0),
16427                                DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16428       }
16429     }
16430   }
16431 
16432   // Looking for a truncate of i128 to i64.
16433   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16434     return SDValue();
16435 
16436   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16437 
16438   // SRL feeding TRUNCATE.
16439   if (Op0.getOpcode() == ISD::SRL) {
16440     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16441     // The right shift has to be by 64 bits.
16442     if (!ConstNode || ConstNode->getZExtValue() != 64)
16443       return SDValue();
16444 
16445     // Switch the element number to extract.
16446     EltToExtract = EltToExtract ? 0 : 1;
16447     // Update Op0 past the SRL.
16448     Op0 = Op0.getOperand(0);
16449   }
16450 
16451   // BITCAST feeding a TRUNCATE possibly via SRL.
16452   if (Op0.getOpcode() == ISD::BITCAST &&
16453       Op0.getValueType() == MVT::i128 &&
16454       Op0.getOperand(0).getValueType() == MVT::f128) {
16455     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16456     return DCI.DAG.getNode(
16457         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16458         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16459   }
16460   return SDValue();
16461 }
16462 
16463 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16464   SelectionDAG &DAG = DCI.DAG;
16465 
16466   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16467   if (!ConstOpOrElement)
16468     return SDValue();
16469 
16470   // An imul is usually smaller than the alternative sequence for legal type.
16471   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16472       isOperationLegal(ISD::MUL, N->getValueType(0)))
16473     return SDValue();
16474 
16475   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16476     switch (this->Subtarget.getCPUDirective()) {
16477     default:
16478       // TODO: enhance the condition for subtarget before pwr8
16479       return false;
16480     case PPC::DIR_PWR8:
16481       //  type        mul     add    shl
16482       // scalar        4       1      1
16483       // vector        7       2      2
16484       return true;
16485     case PPC::DIR_PWR9:
16486     case PPC::DIR_PWR10:
16487     case PPC::DIR_PWR_FUTURE:
16488       //  type        mul     add    shl
16489       // scalar        5       2      2
16490       // vector        7       2      2
16491 
16492       // The cycle RATIO of related operations are showed as a table above.
16493       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16494       // scalar and vector type. For 2 instrs patterns, add/sub + shl
16495       // are 4, it is always profitable; but for 3 instrs patterns
16496       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16497       // So we should only do it for vector type.
16498       return IsAddOne && IsNeg ? VT.isVector() : true;
16499     }
16500   };
16501 
16502   EVT VT = N->getValueType(0);
16503   SDLoc DL(N);
16504 
16505   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16506   bool IsNeg = MulAmt.isNegative();
16507   APInt MulAmtAbs = MulAmt.abs();
16508 
16509   if ((MulAmtAbs - 1).isPowerOf2()) {
16510     // (mul x, 2^N + 1) => (add (shl x, N), x)
16511     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16512 
16513     if (!IsProfitable(IsNeg, true, VT))
16514       return SDValue();
16515 
16516     SDValue Op0 = N->getOperand(0);
16517     SDValue Op1 =
16518         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16519                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16520     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16521 
16522     if (!IsNeg)
16523       return Res;
16524 
16525     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16526   } else if ((MulAmtAbs + 1).isPowerOf2()) {
16527     // (mul x, 2^N - 1) => (sub (shl x, N), x)
16528     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16529 
16530     if (!IsProfitable(IsNeg, false, VT))
16531       return SDValue();
16532 
16533     SDValue Op0 = N->getOperand(0);
16534     SDValue Op1 =
16535         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16536                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16537 
16538     if (!IsNeg)
16539       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16540     else
16541       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16542 
16543   } else {
16544     return SDValue();
16545   }
16546 }
16547 
16548 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16549 // in combiner since we need to check SD flags and other subtarget features.
16550 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16551                                           DAGCombinerInfo &DCI) const {
16552   SDValue N0 = N->getOperand(0);
16553   SDValue N1 = N->getOperand(1);
16554   SDValue N2 = N->getOperand(2);
16555   SDNodeFlags Flags = N->getFlags();
16556   EVT VT = N->getValueType(0);
16557   SelectionDAG &DAG = DCI.DAG;
16558   const TargetOptions &Options = getTargetMachine().Options;
16559   unsigned Opc = N->getOpcode();
16560   bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16561   bool LegalOps = !DCI.isBeforeLegalizeOps();
16562   SDLoc Loc(N);
16563 
16564   if (!isOperationLegal(ISD::FMA, VT))
16565     return SDValue();
16566 
16567   // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16568   // since (fnmsub a b c)=-0 while c-ab=+0.
16569   if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16570     return SDValue();
16571 
16572   // (fma (fneg a) b c) => (fnmsub a b c)
16573   // (fnmsub (fneg a) b c) => (fma a b c)
16574   if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16575     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16576 
16577   // (fma a (fneg b) c) => (fnmsub a b c)
16578   // (fnmsub a (fneg b) c) => (fma a b c)
16579   if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16580     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16581 
16582   return SDValue();
16583 }
16584 
16585 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16586   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16587   if (!Subtarget.is64BitELFABI())
16588     return false;
16589 
16590   // If not a tail call then no need to proceed.
16591   if (!CI->isTailCall())
16592     return false;
16593 
16594   // If sibling calls have been disabled and tail-calls aren't guaranteed
16595   // there is no reason to duplicate.
16596   auto &TM = getTargetMachine();
16597   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16598     return false;
16599 
16600   // Can't tail call a function called indirectly, or if it has variadic args.
16601   const Function *Callee = CI->getCalledFunction();
16602   if (!Callee || Callee->isVarArg())
16603     return false;
16604 
16605   // Make sure the callee and caller calling conventions are eligible for tco.
16606   const Function *Caller = CI->getParent()->getParent();
16607   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16608                                            CI->getCallingConv()))
16609       return false;
16610 
16611   // If the function is local then we have a good chance at tail-calling it
16612   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16613 }
16614 
16615 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16616   if (!Subtarget.hasVSX())
16617     return false;
16618   if (Subtarget.hasP9Vector() && VT == MVT::f128)
16619     return true;
16620   return VT == MVT::f32 || VT == MVT::f64 ||
16621     VT == MVT::v4f32 || VT == MVT::v2f64;
16622 }
16623 
16624 bool PPCTargetLowering::
16625 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16626   const Value *Mask = AndI.getOperand(1);
16627   // If the mask is suitable for andi. or andis. we should sink the and.
16628   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16629     // Can't handle constants wider than 64-bits.
16630     if (CI->getBitWidth() > 64)
16631       return false;
16632     int64_t ConstVal = CI->getZExtValue();
16633     return isUInt<16>(ConstVal) ||
16634       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16635   }
16636 
16637   // For non-constant masks, we can always use the record-form and.
16638   return true;
16639 }
16640 
16641 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16642 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16643 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16644 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16645 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16646 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16647   assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
16648   assert(Subtarget.hasP9Altivec() &&
16649          "Only combine this when P9 altivec supported!");
16650   EVT VT = N->getValueType(0);
16651   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16652     return SDValue();
16653 
16654   SelectionDAG &DAG = DCI.DAG;
16655   SDLoc dl(N);
16656   if (N->getOperand(0).getOpcode() == ISD::SUB) {
16657     // Even for signed integers, if it's known to be positive (as signed
16658     // integer) due to zero-extended inputs.
16659     unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16660     unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16661     if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16662          SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16663         (SubOpcd1 == ISD::ZERO_EXTEND ||
16664          SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16665       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16666                          N->getOperand(0)->getOperand(0),
16667                          N->getOperand(0)->getOperand(1),
16668                          DAG.getTargetConstant(0, dl, MVT::i32));
16669     }
16670 
16671     // For type v4i32, it can be optimized with xvnegsp + vabsduw
16672     if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16673         N->getOperand(0).hasOneUse()) {
16674       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16675                          N->getOperand(0)->getOperand(0),
16676                          N->getOperand(0)->getOperand(1),
16677                          DAG.getTargetConstant(1, dl, MVT::i32));
16678     }
16679   }
16680 
16681   return SDValue();
16682 }
16683 
16684 // For type v4i32/v8ii16/v16i8, transform
16685 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16686 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16687 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16688 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16689 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16690                                           DAGCombinerInfo &DCI) const {
16691   assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
16692   assert(Subtarget.hasP9Altivec() &&
16693          "Only combine this when P9 altivec supported!");
16694 
16695   SelectionDAG &DAG = DCI.DAG;
16696   SDLoc dl(N);
16697   SDValue Cond = N->getOperand(0);
16698   SDValue TrueOpnd = N->getOperand(1);
16699   SDValue FalseOpnd = N->getOperand(2);
16700   EVT VT = N->getOperand(1).getValueType();
16701 
16702   if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16703       FalseOpnd.getOpcode() != ISD::SUB)
16704     return SDValue();
16705 
16706   // ABSD only available for type v4i32/v8i16/v16i8
16707   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16708     return SDValue();
16709 
16710   // At least to save one more dependent computation
16711   if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16712     return SDValue();
16713 
16714   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16715 
16716   // Can only handle unsigned comparison here
16717   switch (CC) {
16718   default:
16719     return SDValue();
16720   case ISD::SETUGT:
16721   case ISD::SETUGE:
16722     break;
16723   case ISD::SETULT:
16724   case ISD::SETULE:
16725     std::swap(TrueOpnd, FalseOpnd);
16726     break;
16727   }
16728 
16729   SDValue CmpOpnd1 = Cond.getOperand(0);
16730   SDValue CmpOpnd2 = Cond.getOperand(1);
16731 
16732   // SETCC CmpOpnd1 CmpOpnd2 cond
16733   // TrueOpnd = CmpOpnd1 - CmpOpnd2
16734   // FalseOpnd = CmpOpnd2 - CmpOpnd1
16735   if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16736       TrueOpnd.getOperand(1) == CmpOpnd2 &&
16737       FalseOpnd.getOperand(0) == CmpOpnd2 &&
16738       FalseOpnd.getOperand(1) == CmpOpnd1) {
16739     return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16740                        CmpOpnd1, CmpOpnd2,
16741                        DAG.getTargetConstant(0, dl, MVT::i32));
16742   }
16743 
16744   return SDValue();
16745 }
16746