1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56 #include "llvm/CodeGen/TargetRegisterInfo.h"
57 #include "llvm/CodeGen/ValueTypes.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSectionXCOFF.h"
78 #include "llvm/MC/MCSymbolXCOFF.h"
79 #include "llvm/Support/AtomicOrdering.h"
80 #include "llvm/Support/BranchProbability.h"
81 #include "llvm/Support/Casting.h"
82 #include "llvm/Support/CodeGen.h"
83 #include "llvm/Support/CommandLine.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
86 #include "llvm/Support/ErrorHandling.h"
87 #include "llvm/Support/Format.h"
88 #include "llvm/Support/KnownBits.h"
89 #include "llvm/Support/MachineValueType.h"
90 #include "llvm/Support/MathExtras.h"
91 #include "llvm/Support/raw_ostream.h"
92 #include "llvm/Target/TargetMachine.h"
93 #include "llvm/Target/TargetOptions.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
124 // TODO - Remove this option if soft fp128 has been fully supported .
125 static cl::opt<bool>
126     EnableSoftFP128("enable-soft-fp128",
127                     cl::desc("temp option to enable soft fp128"), cl::Hidden);
128 
129 STATISTIC(NumTailCalls, "Number of tail calls");
130 STATISTIC(NumSiblingCalls, "Number of sibling calls");
131 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
132 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
133 
134 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135 
136 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137 
138 // FIXME: Remove this once the bug has been fixed!
139 extern cl::opt<bool> ANDIGlueBug;
140 
141 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
142                                      const PPCSubtarget &STI)
143     : TargetLowering(TM), Subtarget(STI) {
144   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145   // arguments are at least 4/8 bytes aligned.
146   bool isPPC64 = Subtarget.isPPC64();
147   setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148 
149   // Set up the register classes.
150   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151   if (!useSoftFloat()) {
152     if (hasSPE()) {
153       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154       addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
155     } else {
156       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
157       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
158     }
159   }
160 
161   // Match BITREVERSE to customized fast code sequence in the td file.
162   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
163   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
164 
165   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
166   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
167 
168   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
169   for (MVT VT : MVT::integer_valuetypes()) {
170     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
171     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
172   }
173 
174   if (Subtarget.isISA3_0()) {
175     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
176     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
177     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
178     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
179   } else {
180     // No extending loads from f16 or HW conversions back and forth.
181     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
182     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
183     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
184     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
185     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
186     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
187     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
188     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189   }
190 
191   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
192 
193   // PowerPC has pre-inc load and store's.
194   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
195   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
196   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
197   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
198   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
199   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
200   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
201   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
202   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
203   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
204   if (!Subtarget.hasSPE()) {
205     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
206     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
207     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
208     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
209   }
210 
211   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
212   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
213   for (MVT VT : ScalarIntVTs) {
214     setOperationAction(ISD::ADDC, VT, Legal);
215     setOperationAction(ISD::ADDE, VT, Legal);
216     setOperationAction(ISD::SUBC, VT, Legal);
217     setOperationAction(ISD::SUBE, VT, Legal);
218   }
219 
220   if (Subtarget.useCRBits()) {
221     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
222 
223     if (isPPC64 || Subtarget.hasFPCVT()) {
224       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
225       AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
226                         isPPC64 ? MVT::i64 : MVT::i32);
227       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
228       AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
229                         isPPC64 ? MVT::i64 : MVT::i32);
230 
231       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
232       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
233                          isPPC64 ? MVT::i64 : MVT::i32);
234       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
235       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
236                         isPPC64 ? MVT::i64 : MVT::i32);
237     } else {
238       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
239       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
240       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
241       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
242     }
243 
244     // PowerPC does not support direct load/store of condition registers.
245     setOperationAction(ISD::LOAD, MVT::i1, Custom);
246     setOperationAction(ISD::STORE, MVT::i1, Custom);
247 
248     // FIXME: Remove this once the ANDI glue bug is fixed:
249     if (ANDIGlueBug)
250       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
251 
252     for (MVT VT : MVT::integer_valuetypes()) {
253       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
254       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
255       setTruncStoreAction(VT, MVT::i1, Expand);
256     }
257 
258     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
259   }
260 
261   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
262   // PPC (the libcall is not available).
263   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
264   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
265   setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
266   setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
267 
268   // We do not currently implement these libm ops for PowerPC.
269   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
270   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
271   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
272   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
273   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
274   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
275 
276   // PowerPC has no SREM/UREM instructions unless we are on P9
277   // On P9 we may use a hardware instruction to compute the remainder.
278   // When the result of both the remainder and the division is required it is
279   // more efficient to compute the remainder from the result of the division
280   // rather than use the remainder instruction. The instructions are legalized
281   // directly because the DivRemPairsPass performs the transformation at the IR
282   // level.
283   if (Subtarget.isISA3_0()) {
284     setOperationAction(ISD::SREM, MVT::i32, Legal);
285     setOperationAction(ISD::UREM, MVT::i32, Legal);
286     setOperationAction(ISD::SREM, MVT::i64, Legal);
287     setOperationAction(ISD::UREM, MVT::i64, Legal);
288   } else {
289     setOperationAction(ISD::SREM, MVT::i32, Expand);
290     setOperationAction(ISD::UREM, MVT::i32, Expand);
291     setOperationAction(ISD::SREM, MVT::i64, Expand);
292     setOperationAction(ISD::UREM, MVT::i64, Expand);
293   }
294 
295   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
296   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
297   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
298   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
299   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
300   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
301   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
302   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
303   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
304 
305   // Handle constrained floating-point operations of scalar.
306   // TODO: Handle SPE specific operation.
307   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
308   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
309   setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
310   setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
311   setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
312   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
313 
314   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
315   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
316   setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
317   setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
318   setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
319   if (Subtarget.hasVSX()) {
320     setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
321     setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
322   }
323 
324   if (Subtarget.hasFSQRT()) {
325     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
326     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
327   }
328 
329   if (Subtarget.hasFPRND()) {
330     setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
331     setOperationAction(ISD::STRICT_FCEIL,  MVT::f32, Legal);
332     setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
333     setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
334 
335     setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
336     setOperationAction(ISD::STRICT_FCEIL,  MVT::f64, Legal);
337     setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
338     setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
339   }
340 
341   // We don't support sin/cos/sqrt/fmod/pow
342   setOperationAction(ISD::FSIN , MVT::f64, Expand);
343   setOperationAction(ISD::FCOS , MVT::f64, Expand);
344   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
345   setOperationAction(ISD::FREM , MVT::f64, Expand);
346   setOperationAction(ISD::FPOW , MVT::f64, Expand);
347   setOperationAction(ISD::FSIN , MVT::f32, Expand);
348   setOperationAction(ISD::FCOS , MVT::f32, Expand);
349   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
350   setOperationAction(ISD::FREM , MVT::f32, Expand);
351   setOperationAction(ISD::FPOW , MVT::f32, Expand);
352   if (Subtarget.hasSPE()) {
353     setOperationAction(ISD::FMA  , MVT::f64, Expand);
354     setOperationAction(ISD::FMA  , MVT::f32, Expand);
355   } else {
356     setOperationAction(ISD::FMA  , MVT::f64, Legal);
357     setOperationAction(ISD::FMA  , MVT::f32, Legal);
358   }
359 
360   if (Subtarget.hasSPE())
361     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
362 
363   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
364 
365   // If we're enabling GP optimizations, use hardware square root
366   if (!Subtarget.hasFSQRT() &&
367       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
368         Subtarget.hasFRE()))
369     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
370 
371   if (!Subtarget.hasFSQRT() &&
372       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
373         Subtarget.hasFRES()))
374     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
375 
376   if (Subtarget.hasFCPSGN()) {
377     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
378     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
379   } else {
380     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
381     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
382   }
383 
384   if (Subtarget.hasFPRND()) {
385     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
386     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
387     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
388     setOperationAction(ISD::FROUND, MVT::f64, Legal);
389 
390     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
391     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
392     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
393     setOperationAction(ISD::FROUND, MVT::f32, Legal);
394   }
395 
396   // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
397   // to speed up scalar BSWAP64.
398   // CTPOP or CTTZ were introduced in P8/P9 respectively
399   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
400   if (Subtarget.hasP9Vector())
401     setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
402   else
403     setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
404   if (Subtarget.isISA3_0()) {
405     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
406     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
407   } else {
408     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
409     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
410   }
411 
412   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
413     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
414     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
415   } else {
416     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
417     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
418   }
419 
420   // PowerPC does not have ROTR
421   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
422   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
423 
424   if (!Subtarget.useCRBits()) {
425     // PowerPC does not have Select
426     setOperationAction(ISD::SELECT, MVT::i32, Expand);
427     setOperationAction(ISD::SELECT, MVT::i64, Expand);
428     setOperationAction(ISD::SELECT, MVT::f32, Expand);
429     setOperationAction(ISD::SELECT, MVT::f64, Expand);
430   }
431 
432   // PowerPC wants to turn select_cc of FP into fsel when possible.
433   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
434   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
435 
436   // PowerPC wants to optimize integer setcc a bit
437   if (!Subtarget.useCRBits())
438     setOperationAction(ISD::SETCC, MVT::i32, Custom);
439 
440   if (Subtarget.hasFPU()) {
441     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
442     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
443     setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
444 
445     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
446     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
447     setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
448   }
449 
450   // PowerPC does not have BRCOND which requires SetCC
451   if (!Subtarget.useCRBits())
452     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
453 
454   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
455 
456   if (Subtarget.hasSPE()) {
457     // SPE has built-in conversions
458     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
459     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
460     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
461     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
462     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
463     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
464   } else {
465     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
466     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
467     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
468 
469     // PowerPC does not have [U|S]INT_TO_FP
470     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
471     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
472     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
473     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
474   }
475 
476   if (Subtarget.hasDirectMove() && isPPC64) {
477     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
478     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
479     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
480     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
481     if (TM.Options.UnsafeFPMath) {
482       setOperationAction(ISD::LRINT, MVT::f64, Legal);
483       setOperationAction(ISD::LRINT, MVT::f32, Legal);
484       setOperationAction(ISD::LLRINT, MVT::f64, Legal);
485       setOperationAction(ISD::LLRINT, MVT::f32, Legal);
486       setOperationAction(ISD::LROUND, MVT::f64, Legal);
487       setOperationAction(ISD::LROUND, MVT::f32, Legal);
488       setOperationAction(ISD::LLROUND, MVT::f64, Legal);
489       setOperationAction(ISD::LLROUND, MVT::f32, Legal);
490     }
491   } else {
492     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
493     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
494     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
495     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
496   }
497 
498   // We cannot sextinreg(i1).  Expand to shifts.
499   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
500 
501   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
502   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
503   // support continuation, user-level threading, and etc.. As a result, no
504   // other SjLj exception interfaces are implemented and please don't build
505   // your own exception handling based on them.
506   // LLVM/Clang supports zero-cost DWARF exception handling.
507   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
508   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
509 
510   // We want to legalize GlobalAddress and ConstantPool nodes into the
511   // appropriate instructions to materialize the address.
512   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
513   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
514   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
515   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
516   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
517   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
518   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
519   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
520   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
521   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
522 
523   // TRAP is legal.
524   setOperationAction(ISD::TRAP, MVT::Other, Legal);
525 
526   // TRAMPOLINE is custom lowered.
527   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
528   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
529 
530   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
531   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
532 
533   if (Subtarget.is64BitELFABI()) {
534     // VAARG always uses double-word chunks, so promote anything smaller.
535     setOperationAction(ISD::VAARG, MVT::i1, Promote);
536     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
537     setOperationAction(ISD::VAARG, MVT::i8, Promote);
538     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
539     setOperationAction(ISD::VAARG, MVT::i16, Promote);
540     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
541     setOperationAction(ISD::VAARG, MVT::i32, Promote);
542     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
543     setOperationAction(ISD::VAARG, MVT::Other, Expand);
544   } else if (Subtarget.is32BitELFABI()) {
545     // VAARG is custom lowered with the 32-bit SVR4 ABI.
546     setOperationAction(ISD::VAARG, MVT::Other, Custom);
547     setOperationAction(ISD::VAARG, MVT::i64, Custom);
548   } else
549     setOperationAction(ISD::VAARG, MVT::Other, Expand);
550 
551   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
552   if (Subtarget.is32BitELFABI())
553     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
554   else
555     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
556 
557   // Use the default implementation.
558   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
559   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
560   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
561   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
562   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
563   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
564   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
565   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
566   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
567 
568   // We want to custom lower some of our intrinsics.
569   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
570 
571   // To handle counter-based loop conditions.
572   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
573 
574   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
575   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
576   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
577   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
578 
579   // Comparisons that require checking two conditions.
580   if (Subtarget.hasSPE()) {
581     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
582     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
583     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
584     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
585   }
586   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
587   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
588   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
589   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
590   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
591   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
592   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
593   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
594   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
595   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
596   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
597   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
598 
599   if (Subtarget.has64BitSupport()) {
600     // They also have instructions for converting between i64 and fp.
601     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
602     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
603     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
604     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
605     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
606     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
607     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
608     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
609     // This is just the low 32 bits of a (signed) fp->i64 conversion.
610     // We cannot do this with Promote because i64 is not a legal type.
611     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
612     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
613 
614     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
615       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
616       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
617     }
618   } else {
619     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
620     if (Subtarget.hasSPE()) {
621       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
622       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
623     } else {
624       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
625       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
626     }
627   }
628 
629   // With the instructions enabled under FPCVT, we can do everything.
630   if (Subtarget.hasFPCVT()) {
631     if (Subtarget.has64BitSupport()) {
632       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
633       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
634       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
635       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
636       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
637       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
638       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
639       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
640     }
641 
642     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
643     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
644     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
645     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
646     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
647     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
648     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
649     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
650   }
651 
652   if (Subtarget.use64BitRegs()) {
653     // 64-bit PowerPC implementations can support i64 types directly
654     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
655     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
656     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
657     // 64-bit PowerPC wants to expand i128 shifts itself.
658     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
659     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
660     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
661   } else {
662     // 32-bit PowerPC wants to expand i64 shifts itself.
663     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
664     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
665     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
666   }
667 
668   // PowerPC has better expansions for funnel shifts than the generic
669   // TargetLowering::expandFunnelShift.
670   if (Subtarget.has64BitSupport()) {
671     setOperationAction(ISD::FSHL, MVT::i64, Custom);
672     setOperationAction(ISD::FSHR, MVT::i64, Custom);
673   }
674   setOperationAction(ISD::FSHL, MVT::i32, Custom);
675   setOperationAction(ISD::FSHR, MVT::i32, Custom);
676 
677   if (Subtarget.hasVSX()) {
678     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
679     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
680     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
681     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
682   }
683 
684   if (Subtarget.hasAltivec()) {
685     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
686       setOperationAction(ISD::SADDSAT, VT, Legal);
687       setOperationAction(ISD::SSUBSAT, VT, Legal);
688       setOperationAction(ISD::UADDSAT, VT, Legal);
689       setOperationAction(ISD::USUBSAT, VT, Legal);
690     }
691     // First set operation action for all vector types to expand. Then we
692     // will selectively turn on ones that can be effectively codegen'd.
693     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
694       // add/sub are legal for all supported vector VT's.
695       setOperationAction(ISD::ADD, VT, Legal);
696       setOperationAction(ISD::SUB, VT, Legal);
697 
698       // For v2i64, these are only valid with P8Vector. This is corrected after
699       // the loop.
700       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
701         setOperationAction(ISD::SMAX, VT, Legal);
702         setOperationAction(ISD::SMIN, VT, Legal);
703         setOperationAction(ISD::UMAX, VT, Legal);
704         setOperationAction(ISD::UMIN, VT, Legal);
705       }
706       else {
707         setOperationAction(ISD::SMAX, VT, Expand);
708         setOperationAction(ISD::SMIN, VT, Expand);
709         setOperationAction(ISD::UMAX, VT, Expand);
710         setOperationAction(ISD::UMIN, VT, Expand);
711       }
712 
713       if (Subtarget.hasVSX()) {
714         setOperationAction(ISD::FMAXNUM, VT, Legal);
715         setOperationAction(ISD::FMINNUM, VT, Legal);
716       }
717 
718       // Vector instructions introduced in P8
719       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
720         setOperationAction(ISD::CTPOP, VT, Legal);
721         setOperationAction(ISD::CTLZ, VT, Legal);
722       }
723       else {
724         setOperationAction(ISD::CTPOP, VT, Expand);
725         setOperationAction(ISD::CTLZ, VT, Expand);
726       }
727 
728       // Vector instructions introduced in P9
729       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
730         setOperationAction(ISD::CTTZ, VT, Legal);
731       else
732         setOperationAction(ISD::CTTZ, VT, Expand);
733 
734       // We promote all shuffles to v16i8.
735       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
736       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
737 
738       // We promote all non-typed operations to v4i32.
739       setOperationAction(ISD::AND   , VT, Promote);
740       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
741       setOperationAction(ISD::OR    , VT, Promote);
742       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
743       setOperationAction(ISD::XOR   , VT, Promote);
744       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
745       setOperationAction(ISD::LOAD  , VT, Promote);
746       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
747       setOperationAction(ISD::SELECT, VT, Promote);
748       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
749       setOperationAction(ISD::VSELECT, VT, Legal);
750       setOperationAction(ISD::SELECT_CC, VT, Promote);
751       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
752       setOperationAction(ISD::STORE, VT, Promote);
753       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
754 
755       // No other operations are legal.
756       setOperationAction(ISD::MUL , VT, Expand);
757       setOperationAction(ISD::SDIV, VT, Expand);
758       setOperationAction(ISD::SREM, VT, Expand);
759       setOperationAction(ISD::UDIV, VT, Expand);
760       setOperationAction(ISD::UREM, VT, Expand);
761       setOperationAction(ISD::FDIV, VT, Expand);
762       setOperationAction(ISD::FREM, VT, Expand);
763       setOperationAction(ISD::FNEG, VT, Expand);
764       setOperationAction(ISD::FSQRT, VT, Expand);
765       setOperationAction(ISD::FLOG, VT, Expand);
766       setOperationAction(ISD::FLOG10, VT, Expand);
767       setOperationAction(ISD::FLOG2, VT, Expand);
768       setOperationAction(ISD::FEXP, VT, Expand);
769       setOperationAction(ISD::FEXP2, VT, Expand);
770       setOperationAction(ISD::FSIN, VT, Expand);
771       setOperationAction(ISD::FCOS, VT, Expand);
772       setOperationAction(ISD::FABS, VT, Expand);
773       setOperationAction(ISD::FFLOOR, VT, Expand);
774       setOperationAction(ISD::FCEIL,  VT, Expand);
775       setOperationAction(ISD::FTRUNC, VT, Expand);
776       setOperationAction(ISD::FRINT,  VT, Expand);
777       setOperationAction(ISD::FNEARBYINT, VT, Expand);
778       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
779       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
780       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
781       setOperationAction(ISD::MULHU, VT, Expand);
782       setOperationAction(ISD::MULHS, VT, Expand);
783       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
784       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
785       setOperationAction(ISD::UDIVREM, VT, Expand);
786       setOperationAction(ISD::SDIVREM, VT, Expand);
787       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
788       setOperationAction(ISD::FPOW, VT, Expand);
789       setOperationAction(ISD::BSWAP, VT, Expand);
790       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
791       setOperationAction(ISD::ROTL, VT, Expand);
792       setOperationAction(ISD::ROTR, VT, Expand);
793 
794       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
795         setTruncStoreAction(VT, InnerVT, Expand);
796         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
797         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
798         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
799       }
800     }
801     setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
802     if (!Subtarget.hasP8Vector()) {
803       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
804       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
805       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
806       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
807     }
808 
809     for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
810       setOperationAction(ISD::ABS, VT, Custom);
811 
812     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
813     // with merges, splats, etc.
814     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
815 
816     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
817     // are cheap, so handle them before they get expanded to scalar.
818     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
819     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
820     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
821     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
822     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
823 
824     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
825     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
826     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
827     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
828     setOperationAction(ISD::SELECT, MVT::v4i32,
829                        Subtarget.useCRBits() ? Legal : Expand);
830     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
831     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
832     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
833     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
834     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
835     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
836     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
837     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
838     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
839     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
840     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
841     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
842     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
843 
844     // Without hasP8Altivec set, v2i64 SMAX isn't available.
845     // But ABS custom lowering requires SMAX support.
846     if (!Subtarget.hasP8Altivec())
847       setOperationAction(ISD::ABS, MVT::v2i64, Expand);
848 
849     // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
850     setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
851     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
852     if (Subtarget.hasAltivec())
853       for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
854         setOperationAction(ISD::ROTL, VT, Legal);
855     // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
856     if (Subtarget.hasP8Altivec())
857       setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
858 
859     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
860     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
861     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
862     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
863 
864     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
865     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
866 
867     if (Subtarget.hasVSX()) {
868       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
869       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
870     }
871 
872     if (Subtarget.hasP8Altivec())
873       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
874     else
875       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
876 
877     if (Subtarget.isISA3_1()) {
878       setOperationAction(ISD::MUL, MVT::v2i64, Legal);
879       setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
880       setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
881       setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
882       setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
883       setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
884       setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
885       setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
886       setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
887       setOperationAction(ISD::UREM, MVT::v2i64, Legal);
888       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
889       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
890       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
891       setOperationAction(ISD::UREM, MVT::v1i128, Legal);
892       setOperationAction(ISD::SREM, MVT::v1i128, Legal);
893       setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
894       setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
895       setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
896     }
897 
898     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
899     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
900 
901     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
902     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
903 
904     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
905     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
906     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
907     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
908 
909     // Altivec does not contain unordered floating-point compare instructions
910     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
911     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
912     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
913     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
914 
915     if (Subtarget.hasVSX()) {
916       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
917       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
918       if (Subtarget.hasP8Vector()) {
919         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
920         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
921       }
922       if (Subtarget.hasDirectMove() && isPPC64) {
923         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
924         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
925         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
926         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
927         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
928         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
929         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
930         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
931       }
932       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
933 
934       // The nearbyint variants are not allowed to raise the inexact exception
935       // so we can only code-gen them with unsafe math.
936       if (TM.Options.UnsafeFPMath) {
937         setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
938         setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
939       }
940 
941       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
942       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
943       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
944       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
945       setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
946       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
947       setOperationAction(ISD::FROUND, MVT::f64, Legal);
948       setOperationAction(ISD::FRINT, MVT::f64, Legal);
949 
950       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
951       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
952       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
953       setOperationAction(ISD::FROUND, MVT::f32, Legal);
954       setOperationAction(ISD::FRINT, MVT::f32, Legal);
955 
956       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
957       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
958 
959       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
960       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
961 
962       // Share the Altivec comparison restrictions.
963       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
964       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
965       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
966       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
967 
968       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
969       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
970 
971       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
972 
973       if (Subtarget.hasP8Vector())
974         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
975 
976       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
977 
978       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
979       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
980       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
981 
982       if (Subtarget.hasP8Altivec()) {
983         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
984         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
985         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
986 
987         // 128 bit shifts can be accomplished via 3 instructions for SHL and
988         // SRL, but not for SRA because of the instructions available:
989         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
990         // doing
991         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
992         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
993         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
994 
995         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
996       }
997       else {
998         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
999         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1000         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1001 
1002         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1003 
1004         // VSX v2i64 only supports non-arithmetic operations.
1005         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1006         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1007       }
1008 
1009       if (Subtarget.isISA3_1())
1010         setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1011       else
1012         setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1013 
1014       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1015       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1016       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1017       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1018 
1019       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1020 
1021       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1022       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1023       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1024       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1025       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1026       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1027       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1028       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1029 
1030       // Custom handling for partial vectors of integers converted to
1031       // floating point. We already have optimal handling for v2i32 through
1032       // the DAG combine, so those aren't necessary.
1033       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1034       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1035       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1036       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1037       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1038       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1039       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1040       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1041       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1042       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1043       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1044       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1045       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1046       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1047       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1048       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1049 
1050       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1051       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1052       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1053       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1054       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1055       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1056 
1057       if (Subtarget.hasDirectMove())
1058         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1059       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1060 
1061       // Handle constrained floating-point operations of vector.
1062       // The predictor is `hasVSX` because altivec instruction has
1063       // no exception but VSX vector instruction has.
1064       setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1065       setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1066       setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1067       setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1068       setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1069       setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1070       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1071       setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1072       setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1073       setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1074       setOperationAction(ISD::STRICT_FCEIL,  MVT::v4f32, Legal);
1075       setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1076       setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1077 
1078       setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1079       setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1080       setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1081       setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1082       setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1083       setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1084       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1085       setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1086       setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1087       setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1088       setOperationAction(ISD::STRICT_FCEIL,  MVT::v2f64, Legal);
1089       setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1090       setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1091 
1092       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1093     }
1094 
1095     if (Subtarget.hasP8Altivec()) {
1096       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1097       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1098     }
1099 
1100     if (Subtarget.hasP9Vector()) {
1101       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1102       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1103 
1104       // 128 bit shifts can be accomplished via 3 instructions for SHL and
1105       // SRL, but not for SRA because of the instructions available:
1106       // VS{RL} and VS{RL}O.
1107       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1108       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1109       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1110 
1111       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1112       setOperationAction(ISD::FADD, MVT::f128, Legal);
1113       setOperationAction(ISD::FSUB, MVT::f128, Legal);
1114       setOperationAction(ISD::FDIV, MVT::f128, Legal);
1115       setOperationAction(ISD::FMUL, MVT::f128, Legal);
1116       setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1117       // No extending loads to f128 on PPC.
1118       for (MVT FPT : MVT::fp_valuetypes())
1119         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1120       setOperationAction(ISD::FMA, MVT::f128, Legal);
1121       setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1122       setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1123       setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1124       setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1125       setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1126       setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1127 
1128       setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1129       setOperationAction(ISD::FRINT, MVT::f128, Legal);
1130       setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1131       setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1132       setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1133       setOperationAction(ISD::FROUND, MVT::f128, Legal);
1134 
1135       setOperationAction(ISD::SELECT, MVT::f128, Expand);
1136       setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1137       setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1138       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1139       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1140       setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1141       // No implementation for these ops for PowerPC.
1142       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1143       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1144       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1145       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1146       setOperationAction(ISD::FREM, MVT::f128, Expand);
1147 
1148       // Handle constrained floating-point operations of fp128
1149       setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1150       setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1151       setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1152       setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1153       setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1154       setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1155       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1156       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1157       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1158       setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1159       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1160       setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1161       setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1162       setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1163       setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1164       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1165       setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1166       setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1167       setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1168       setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1169     } else if (Subtarget.hasAltivec() && EnableSoftFP128) {
1170       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1171 
1172       for (MVT FPT : MVT::fp_valuetypes())
1173         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1174 
1175       setOperationAction(ISD::LOAD, MVT::f128, Promote);
1176       setOperationAction(ISD::STORE, MVT::f128, Promote);
1177 
1178       AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1179       AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1180 
1181       setOperationAction(ISD::FADD, MVT::f128, Expand);
1182       setOperationAction(ISD::FSUB, MVT::f128, Expand);
1183       setOperationAction(ISD::FMUL, MVT::f128, Expand);
1184       setOperationAction(ISD::FDIV, MVT::f128, Expand);
1185       setOperationAction(ISD::FNEG, MVT::f128, Expand);
1186       setOperationAction(ISD::FABS, MVT::f128, Expand);
1187       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1188       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1189       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1190       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1191       setOperationAction(ISD::FREM, MVT::f128, Expand);
1192       setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1193       setOperationAction(ISD::FMA, MVT::f128, Expand);
1194       setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
1195     }
1196 
1197     if (Subtarget.hasP9Altivec()) {
1198       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1199       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1200 
1201       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1202       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1203       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1204       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1205       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1206       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1207       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1208     }
1209   }
1210 
1211   if (Subtarget.pairedVectorMemops()) {
1212     addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1213     setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1214     setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1215   }
1216   if (Subtarget.hasMMA()) {
1217     addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1218     setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1219     setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1220     setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1221   }
1222 
1223   if (Subtarget.has64BitSupport())
1224     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1225 
1226   if (Subtarget.isISA3_1())
1227     setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1228 
1229   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1230 
1231   if (!isPPC64) {
1232     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1233     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1234   }
1235 
1236   setBooleanContents(ZeroOrOneBooleanContent);
1237 
1238   if (Subtarget.hasAltivec()) {
1239     // Altivec instructions set fields to all zeros or all ones.
1240     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1241   }
1242 
1243   if (!isPPC64) {
1244     // These libcalls are not available in 32-bit.
1245     setLibcallName(RTLIB::SHL_I128, nullptr);
1246     setLibcallName(RTLIB::SRL_I128, nullptr);
1247     setLibcallName(RTLIB::SRA_I128, nullptr);
1248   }
1249 
1250   if (!isPPC64)
1251     setMaxAtomicSizeInBitsSupported(32);
1252 
1253   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1254 
1255   // We have target-specific dag combine patterns for the following nodes:
1256   setTargetDAGCombine(ISD::ADD);
1257   setTargetDAGCombine(ISD::SHL);
1258   setTargetDAGCombine(ISD::SRA);
1259   setTargetDAGCombine(ISD::SRL);
1260   setTargetDAGCombine(ISD::MUL);
1261   setTargetDAGCombine(ISD::FMA);
1262   setTargetDAGCombine(ISD::SINT_TO_FP);
1263   setTargetDAGCombine(ISD::BUILD_VECTOR);
1264   if (Subtarget.hasFPCVT())
1265     setTargetDAGCombine(ISD::UINT_TO_FP);
1266   setTargetDAGCombine(ISD::LOAD);
1267   setTargetDAGCombine(ISD::STORE);
1268   setTargetDAGCombine(ISD::BR_CC);
1269   if (Subtarget.useCRBits())
1270     setTargetDAGCombine(ISD::BRCOND);
1271   setTargetDAGCombine(ISD::BSWAP);
1272   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1273   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1274   setTargetDAGCombine(ISD::INTRINSIC_VOID);
1275 
1276   setTargetDAGCombine(ISD::SIGN_EXTEND);
1277   setTargetDAGCombine(ISD::ZERO_EXTEND);
1278   setTargetDAGCombine(ISD::ANY_EXTEND);
1279 
1280   setTargetDAGCombine(ISD::TRUNCATE);
1281   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1282 
1283 
1284   if (Subtarget.useCRBits()) {
1285     setTargetDAGCombine(ISD::TRUNCATE);
1286     setTargetDAGCombine(ISD::SETCC);
1287     setTargetDAGCombine(ISD::SELECT_CC);
1288   }
1289 
1290   if (Subtarget.hasP9Altivec()) {
1291     setTargetDAGCombine(ISD::ABS);
1292     setTargetDAGCombine(ISD::VSELECT);
1293   }
1294 
1295   setLibcallName(RTLIB::LOG_F128, "logf128");
1296   setLibcallName(RTLIB::LOG2_F128, "log2f128");
1297   setLibcallName(RTLIB::LOG10_F128, "log10f128");
1298   setLibcallName(RTLIB::EXP_F128, "expf128");
1299   setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1300   setLibcallName(RTLIB::SIN_F128, "sinf128");
1301   setLibcallName(RTLIB::COS_F128, "cosf128");
1302   setLibcallName(RTLIB::POW_F128, "powf128");
1303   setLibcallName(RTLIB::FMIN_F128, "fminf128");
1304   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1305   setLibcallName(RTLIB::POWI_F128, "__powikf2");
1306   setLibcallName(RTLIB::REM_F128, "fmodf128");
1307 
1308   // With 32 condition bits, we don't need to sink (and duplicate) compares
1309   // aggressively in CodeGenPrep.
1310   if (Subtarget.useCRBits()) {
1311     setHasMultipleConditionRegisters();
1312     setJumpIsExpensive();
1313   }
1314 
1315   setMinFunctionAlignment(Align(4));
1316 
1317   switch (Subtarget.getCPUDirective()) {
1318   default: break;
1319   case PPC::DIR_970:
1320   case PPC::DIR_A2:
1321   case PPC::DIR_E500:
1322   case PPC::DIR_E500mc:
1323   case PPC::DIR_E5500:
1324   case PPC::DIR_PWR4:
1325   case PPC::DIR_PWR5:
1326   case PPC::DIR_PWR5X:
1327   case PPC::DIR_PWR6:
1328   case PPC::DIR_PWR6X:
1329   case PPC::DIR_PWR7:
1330   case PPC::DIR_PWR8:
1331   case PPC::DIR_PWR9:
1332   case PPC::DIR_PWR10:
1333   case PPC::DIR_PWR_FUTURE:
1334     setPrefLoopAlignment(Align(16));
1335     setPrefFunctionAlignment(Align(16));
1336     break;
1337   }
1338 
1339   if (Subtarget.enableMachineScheduler())
1340     setSchedulingPreference(Sched::Source);
1341   else
1342     setSchedulingPreference(Sched::Hybrid);
1343 
1344   computeRegisterProperties(STI.getRegisterInfo());
1345 
1346   // The Freescale cores do better with aggressive inlining of memcpy and
1347   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1348   if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1349       Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1350     MaxStoresPerMemset = 32;
1351     MaxStoresPerMemsetOptSize = 16;
1352     MaxStoresPerMemcpy = 32;
1353     MaxStoresPerMemcpyOptSize = 8;
1354     MaxStoresPerMemmove = 32;
1355     MaxStoresPerMemmoveOptSize = 8;
1356   } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1357     // The A2 also benefits from (very) aggressive inlining of memcpy and
1358     // friends. The overhead of a the function call, even when warm, can be
1359     // over one hundred cycles.
1360     MaxStoresPerMemset = 128;
1361     MaxStoresPerMemcpy = 128;
1362     MaxStoresPerMemmove = 128;
1363     MaxLoadsPerMemcmp = 128;
1364   } else {
1365     MaxLoadsPerMemcmp = 8;
1366     MaxLoadsPerMemcmpOptSize = 4;
1367   }
1368 
1369   IsStrictFPEnabled = true;
1370 
1371   // Let the subtarget (CPU) decide if a predictable select is more expensive
1372   // than the corresponding branch. This information is used in CGP to decide
1373   // when to convert selects into branches.
1374   PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1375 }
1376 
1377 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1378 /// the desired ByVal argument alignment.
1379 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1380   if (MaxAlign == MaxMaxAlign)
1381     return;
1382   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1383     if (MaxMaxAlign >= 32 &&
1384         VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1385       MaxAlign = Align(32);
1386     else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1387              MaxAlign < 16)
1388       MaxAlign = Align(16);
1389   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1390     Align EltAlign;
1391     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1392     if (EltAlign > MaxAlign)
1393       MaxAlign = EltAlign;
1394   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1395     for (auto *EltTy : STy->elements()) {
1396       Align EltAlign;
1397       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1398       if (EltAlign > MaxAlign)
1399         MaxAlign = EltAlign;
1400       if (MaxAlign == MaxMaxAlign)
1401         break;
1402     }
1403   }
1404 }
1405 
1406 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1407 /// function arguments in the caller parameter area.
1408 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1409                                                   const DataLayout &DL) const {
1410   // 16byte and wider vectors are passed on 16byte boundary.
1411   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1412   Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1413   if (Subtarget.hasAltivec())
1414     getMaxByValAlign(Ty, Alignment, Align(16));
1415   return Alignment.value();
1416 }
1417 
1418 bool PPCTargetLowering::useSoftFloat() const {
1419   return Subtarget.useSoftFloat();
1420 }
1421 
1422 bool PPCTargetLowering::hasSPE() const {
1423   return Subtarget.hasSPE();
1424 }
1425 
1426 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1427   return VT.isScalarInteger();
1428 }
1429 
1430 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1431   switch ((PPCISD::NodeType)Opcode) {
1432   case PPCISD::FIRST_NUMBER:    break;
1433   case PPCISD::FSEL:            return "PPCISD::FSEL";
1434   case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
1435   case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
1436   case PPCISD::FCFID:           return "PPCISD::FCFID";
1437   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1438   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1439   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1440   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1441   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1442   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1443   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1444   case PPCISD::FP_TO_UINT_IN_VSR:
1445                                 return "PPCISD::FP_TO_UINT_IN_VSR,";
1446   case PPCISD::FP_TO_SINT_IN_VSR:
1447                                 return "PPCISD::FP_TO_SINT_IN_VSR";
1448   case PPCISD::FRE:             return "PPCISD::FRE";
1449   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1450   case PPCISD::FTSQRT:
1451     return "PPCISD::FTSQRT";
1452   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1453   case PPCISD::VPERM:           return "PPCISD::VPERM";
1454   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1455   case PPCISD::XXSPLTI_SP_TO_DP:
1456     return "PPCISD::XXSPLTI_SP_TO_DP";
1457   case PPCISD::XXSPLTI32DX:
1458     return "PPCISD::XXSPLTI32DX";
1459   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1460   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1461   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1462   case PPCISD::CMPB:            return "PPCISD::CMPB";
1463   case PPCISD::Hi:              return "PPCISD::Hi";
1464   case PPCISD::Lo:              return "PPCISD::Lo";
1465   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1466   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1467   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1468   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1469   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1470   case PPCISD::PROBED_ALLOCA:   return "PPCISD::PROBED_ALLOCA";
1471   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1472   case PPCISD::SRL:             return "PPCISD::SRL";
1473   case PPCISD::SRA:             return "PPCISD::SRA";
1474   case PPCISD::SHL:             return "PPCISD::SHL";
1475   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1476   case PPCISD::CALL:            return "PPCISD::CALL";
1477   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1478   case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1479   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1480   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1481   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1482   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1483   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1484   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1485   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1486   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1487   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1488   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1489   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1490   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1491   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1492   case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1493     return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1494   case PPCISD::ANDI_rec_1_EQ_BIT:
1495     return "PPCISD::ANDI_rec_1_EQ_BIT";
1496   case PPCISD::ANDI_rec_1_GT_BIT:
1497     return "PPCISD::ANDI_rec_1_GT_BIT";
1498   case PPCISD::VCMP:            return "PPCISD::VCMP";
1499   case PPCISD::VCMP_rec:        return "PPCISD::VCMP_rec";
1500   case PPCISD::LBRX:            return "PPCISD::LBRX";
1501   case PPCISD::STBRX:           return "PPCISD::STBRX";
1502   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1503   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1504   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1505   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1506   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1507   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1508   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1509   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1510   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1511   case PPCISD::ST_VSR_SCAL_INT:
1512                                 return "PPCISD::ST_VSR_SCAL_INT";
1513   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1514   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1515   case PPCISD::BDZ:             return "PPCISD::BDZ";
1516   case PPCISD::MFFS:            return "PPCISD::MFFS";
1517   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1518   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1519   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1520   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1521   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1522   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1523   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1524   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1525   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1526   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1527   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1528   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1529   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1530   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1531   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1532   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1533   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1534   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1535   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1536   case PPCISD::PADDI_DTPREL:
1537     return "PPCISD::PADDI_DTPREL";
1538   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1539   case PPCISD::SC:              return "PPCISD::SC";
1540   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1541   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1542   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1543   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1544   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1545   case PPCISD::VABSD:           return "PPCISD::VABSD";
1546   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1547   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1548   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1549   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1550   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1551   case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1552   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1553   case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1554     return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1555   case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1556     return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1557   case PPCISD::ACC_BUILD:       return "PPCISD::ACC_BUILD";
1558   case PPCISD::PAIR_BUILD:      return "PPCISD::PAIR_BUILD";
1559   case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1560   case PPCISD::XXMFACC:         return "PPCISD::XXMFACC";
1561   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1562   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
1563   case PPCISD::STRICT_FADDRTZ:
1564     return "PPCISD::STRICT_FADDRTZ";
1565   case PPCISD::STRICT_FCTIDZ:
1566     return "PPCISD::STRICT_FCTIDZ";
1567   case PPCISD::STRICT_FCTIWZ:
1568     return "PPCISD::STRICT_FCTIWZ";
1569   case PPCISD::STRICT_FCTIDUZ:
1570     return "PPCISD::STRICT_FCTIDUZ";
1571   case PPCISD::STRICT_FCTIWUZ:
1572     return "PPCISD::STRICT_FCTIWUZ";
1573   case PPCISD::STRICT_FCFID:
1574     return "PPCISD::STRICT_FCFID";
1575   case PPCISD::STRICT_FCFIDU:
1576     return "PPCISD::STRICT_FCFIDU";
1577   case PPCISD::STRICT_FCFIDS:
1578     return "PPCISD::STRICT_FCFIDS";
1579   case PPCISD::STRICT_FCFIDUS:
1580     return "PPCISD::STRICT_FCFIDUS";
1581   case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
1582   }
1583   return nullptr;
1584 }
1585 
1586 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1587                                           EVT VT) const {
1588   if (!VT.isVector())
1589     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1590 
1591   return VT.changeVectorElementTypeToInteger();
1592 }
1593 
1594 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1595   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1596   return true;
1597 }
1598 
1599 //===----------------------------------------------------------------------===//
1600 // Node matching predicates, for use by the tblgen matching code.
1601 //===----------------------------------------------------------------------===//
1602 
1603 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1604 static bool isFloatingPointZero(SDValue Op) {
1605   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1606     return CFP->getValueAPF().isZero();
1607   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1608     // Maybe this has already been legalized into the constant pool?
1609     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1610       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1611         return CFP->getValueAPF().isZero();
1612   }
1613   return false;
1614 }
1615 
1616 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1617 /// true if Op is undef or if it matches the specified value.
1618 static bool isConstantOrUndef(int Op, int Val) {
1619   return Op < 0 || Op == Val;
1620 }
1621 
1622 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1623 /// VPKUHUM instruction.
1624 /// The ShuffleKind distinguishes between big-endian operations with
1625 /// two different inputs (0), either-endian operations with two identical
1626 /// inputs (1), and little-endian operations with two different inputs (2).
1627 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1628 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1629                                SelectionDAG &DAG) {
1630   bool IsLE = DAG.getDataLayout().isLittleEndian();
1631   if (ShuffleKind == 0) {
1632     if (IsLE)
1633       return false;
1634     for (unsigned i = 0; i != 16; ++i)
1635       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1636         return false;
1637   } else if (ShuffleKind == 2) {
1638     if (!IsLE)
1639       return false;
1640     for (unsigned i = 0; i != 16; ++i)
1641       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1642         return false;
1643   } else if (ShuffleKind == 1) {
1644     unsigned j = IsLE ? 0 : 1;
1645     for (unsigned i = 0; i != 8; ++i)
1646       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1647           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1648         return false;
1649   }
1650   return true;
1651 }
1652 
1653 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1654 /// VPKUWUM instruction.
1655 /// The ShuffleKind distinguishes between big-endian operations with
1656 /// two different inputs (0), either-endian operations with two identical
1657 /// inputs (1), and little-endian operations with two different inputs (2).
1658 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1659 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1660                                SelectionDAG &DAG) {
1661   bool IsLE = DAG.getDataLayout().isLittleEndian();
1662   if (ShuffleKind == 0) {
1663     if (IsLE)
1664       return false;
1665     for (unsigned i = 0; i != 16; i += 2)
1666       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1667           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1668         return false;
1669   } else if (ShuffleKind == 2) {
1670     if (!IsLE)
1671       return false;
1672     for (unsigned i = 0; i != 16; i += 2)
1673       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1674           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1675         return false;
1676   } else if (ShuffleKind == 1) {
1677     unsigned j = IsLE ? 0 : 2;
1678     for (unsigned i = 0; i != 8; i += 2)
1679       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1680           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1681           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1682           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1683         return false;
1684   }
1685   return true;
1686 }
1687 
1688 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1689 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1690 /// current subtarget.
1691 ///
1692 /// The ShuffleKind distinguishes between big-endian operations with
1693 /// two different inputs (0), either-endian operations with two identical
1694 /// inputs (1), and little-endian operations with two different inputs (2).
1695 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1696 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1697                                SelectionDAG &DAG) {
1698   const PPCSubtarget& Subtarget =
1699       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1700   if (!Subtarget.hasP8Vector())
1701     return false;
1702 
1703   bool IsLE = DAG.getDataLayout().isLittleEndian();
1704   if (ShuffleKind == 0) {
1705     if (IsLE)
1706       return false;
1707     for (unsigned i = 0; i != 16; i += 4)
1708       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1709           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1710           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1711           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1712         return false;
1713   } else if (ShuffleKind == 2) {
1714     if (!IsLE)
1715       return false;
1716     for (unsigned i = 0; i != 16; i += 4)
1717       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1718           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1719           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1720           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1721         return false;
1722   } else if (ShuffleKind == 1) {
1723     unsigned j = IsLE ? 0 : 4;
1724     for (unsigned i = 0; i != 8; i += 4)
1725       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1726           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1727           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1728           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1729           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1730           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1731           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1732           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1733         return false;
1734   }
1735   return true;
1736 }
1737 
1738 /// isVMerge - Common function, used to match vmrg* shuffles.
1739 ///
1740 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1741                      unsigned LHSStart, unsigned RHSStart) {
1742   if (N->getValueType(0) != MVT::v16i8)
1743     return false;
1744   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1745          "Unsupported merge size!");
1746 
1747   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1748     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1749       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1750                              LHSStart+j+i*UnitSize) ||
1751           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1752                              RHSStart+j+i*UnitSize))
1753         return false;
1754     }
1755   return true;
1756 }
1757 
1758 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1759 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1760 /// The ShuffleKind distinguishes between big-endian merges with two
1761 /// different inputs (0), either-endian merges with two identical inputs (1),
1762 /// and little-endian merges with two different inputs (2).  For the latter,
1763 /// the input operands are swapped (see PPCInstrAltivec.td).
1764 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1765                              unsigned ShuffleKind, SelectionDAG &DAG) {
1766   if (DAG.getDataLayout().isLittleEndian()) {
1767     if (ShuffleKind == 1) // unary
1768       return isVMerge(N, UnitSize, 0, 0);
1769     else if (ShuffleKind == 2) // swapped
1770       return isVMerge(N, UnitSize, 0, 16);
1771     else
1772       return false;
1773   } else {
1774     if (ShuffleKind == 1) // unary
1775       return isVMerge(N, UnitSize, 8, 8);
1776     else if (ShuffleKind == 0) // normal
1777       return isVMerge(N, UnitSize, 8, 24);
1778     else
1779       return false;
1780   }
1781 }
1782 
1783 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1784 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1785 /// The ShuffleKind distinguishes between big-endian merges with two
1786 /// different inputs (0), either-endian merges with two identical inputs (1),
1787 /// and little-endian merges with two different inputs (2).  For the latter,
1788 /// the input operands are swapped (see PPCInstrAltivec.td).
1789 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1790                              unsigned ShuffleKind, SelectionDAG &DAG) {
1791   if (DAG.getDataLayout().isLittleEndian()) {
1792     if (ShuffleKind == 1) // unary
1793       return isVMerge(N, UnitSize, 8, 8);
1794     else if (ShuffleKind == 2) // swapped
1795       return isVMerge(N, UnitSize, 8, 24);
1796     else
1797       return false;
1798   } else {
1799     if (ShuffleKind == 1) // unary
1800       return isVMerge(N, UnitSize, 0, 0);
1801     else if (ShuffleKind == 0) // normal
1802       return isVMerge(N, UnitSize, 0, 16);
1803     else
1804       return false;
1805   }
1806 }
1807 
1808 /**
1809  * Common function used to match vmrgew and vmrgow shuffles
1810  *
1811  * The indexOffset determines whether to look for even or odd words in
1812  * the shuffle mask. This is based on the of the endianness of the target
1813  * machine.
1814  *   - Little Endian:
1815  *     - Use offset of 0 to check for odd elements
1816  *     - Use offset of 4 to check for even elements
1817  *   - Big Endian:
1818  *     - Use offset of 0 to check for even elements
1819  *     - Use offset of 4 to check for odd elements
1820  * A detailed description of the vector element ordering for little endian and
1821  * big endian can be found at
1822  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1823  * Targeting your applications - what little endian and big endian IBM XL C/C++
1824  * compiler differences mean to you
1825  *
1826  * The mask to the shuffle vector instruction specifies the indices of the
1827  * elements from the two input vectors to place in the result. The elements are
1828  * numbered in array-access order, starting with the first vector. These vectors
1829  * are always of type v16i8, thus each vector will contain 16 elements of size
1830  * 8. More info on the shuffle vector can be found in the
1831  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1832  * Language Reference.
1833  *
1834  * The RHSStartValue indicates whether the same input vectors are used (unary)
1835  * or two different input vectors are used, based on the following:
1836  *   - If the instruction uses the same vector for both inputs, the range of the
1837  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1838  *     be 0.
1839  *   - If the instruction has two different vectors then the range of the
1840  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1841  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1842  *     to 31 specify elements in the second vector).
1843  *
1844  * \param[in] N The shuffle vector SD Node to analyze
1845  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1846  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1847  * vector to the shuffle_vector instruction
1848  * \return true iff this shuffle vector represents an even or odd word merge
1849  */
1850 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1851                      unsigned RHSStartValue) {
1852   if (N->getValueType(0) != MVT::v16i8)
1853     return false;
1854 
1855   for (unsigned i = 0; i < 2; ++i)
1856     for (unsigned j = 0; j < 4; ++j)
1857       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1858                              i*RHSStartValue+j+IndexOffset) ||
1859           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1860                              i*RHSStartValue+j+IndexOffset+8))
1861         return false;
1862   return true;
1863 }
1864 
1865 /**
1866  * Determine if the specified shuffle mask is suitable for the vmrgew or
1867  * vmrgow instructions.
1868  *
1869  * \param[in] N The shuffle vector SD Node to analyze
1870  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1871  * \param[in] ShuffleKind Identify the type of merge:
1872  *   - 0 = big-endian merge with two different inputs;
1873  *   - 1 = either-endian merge with two identical inputs;
1874  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1875  *     little-endian merges).
1876  * \param[in] DAG The current SelectionDAG
1877  * \return true iff this shuffle mask
1878  */
1879 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1880                               unsigned ShuffleKind, SelectionDAG &DAG) {
1881   if (DAG.getDataLayout().isLittleEndian()) {
1882     unsigned indexOffset = CheckEven ? 4 : 0;
1883     if (ShuffleKind == 1) // Unary
1884       return isVMerge(N, indexOffset, 0);
1885     else if (ShuffleKind == 2) // swapped
1886       return isVMerge(N, indexOffset, 16);
1887     else
1888       return false;
1889   }
1890   else {
1891     unsigned indexOffset = CheckEven ? 0 : 4;
1892     if (ShuffleKind == 1) // Unary
1893       return isVMerge(N, indexOffset, 0);
1894     else if (ShuffleKind == 0) // Normal
1895       return isVMerge(N, indexOffset, 16);
1896     else
1897       return false;
1898   }
1899   return false;
1900 }
1901 
1902 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1903 /// amount, otherwise return -1.
1904 /// The ShuffleKind distinguishes between big-endian operations with two
1905 /// different inputs (0), either-endian operations with two identical inputs
1906 /// (1), and little-endian operations with two different inputs (2).  For the
1907 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1908 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1909                              SelectionDAG &DAG) {
1910   if (N->getValueType(0) != MVT::v16i8)
1911     return -1;
1912 
1913   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1914 
1915   // Find the first non-undef value in the shuffle mask.
1916   unsigned i;
1917   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1918     /*search*/;
1919 
1920   if (i == 16) return -1;  // all undef.
1921 
1922   // Otherwise, check to see if the rest of the elements are consecutively
1923   // numbered from this value.
1924   unsigned ShiftAmt = SVOp->getMaskElt(i);
1925   if (ShiftAmt < i) return -1;
1926 
1927   ShiftAmt -= i;
1928   bool isLE = DAG.getDataLayout().isLittleEndian();
1929 
1930   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1931     // Check the rest of the elements to see if they are consecutive.
1932     for (++i; i != 16; ++i)
1933       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1934         return -1;
1935   } else if (ShuffleKind == 1) {
1936     // Check the rest of the elements to see if they are consecutive.
1937     for (++i; i != 16; ++i)
1938       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1939         return -1;
1940   } else
1941     return -1;
1942 
1943   if (isLE)
1944     ShiftAmt = 16 - ShiftAmt;
1945 
1946   return ShiftAmt;
1947 }
1948 
1949 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1950 /// specifies a splat of a single element that is suitable for input to
1951 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1952 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1953   assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1954          EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
1955 
1956   // The consecutive indices need to specify an element, not part of two
1957   // different elements.  So abandon ship early if this isn't the case.
1958   if (N->getMaskElt(0) % EltSize != 0)
1959     return false;
1960 
1961   // This is a splat operation if each element of the permute is the same, and
1962   // if the value doesn't reference the second vector.
1963   unsigned ElementBase = N->getMaskElt(0);
1964 
1965   // FIXME: Handle UNDEF elements too!
1966   if (ElementBase >= 16)
1967     return false;
1968 
1969   // Check that the indices are consecutive, in the case of a multi-byte element
1970   // splatted with a v16i8 mask.
1971   for (unsigned i = 1; i != EltSize; ++i)
1972     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1973       return false;
1974 
1975   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1976     if (N->getMaskElt(i) < 0) continue;
1977     for (unsigned j = 0; j != EltSize; ++j)
1978       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1979         return false;
1980   }
1981   return true;
1982 }
1983 
1984 /// Check that the mask is shuffling N byte elements. Within each N byte
1985 /// element of the mask, the indices could be either in increasing or
1986 /// decreasing order as long as they are consecutive.
1987 /// \param[in] N the shuffle vector SD Node to analyze
1988 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1989 /// Word/DoubleWord/QuadWord).
1990 /// \param[in] StepLen the delta indices number among the N byte element, if
1991 /// the mask is in increasing/decreasing order then it is 1/-1.
1992 /// \return true iff the mask is shuffling N byte elements.
1993 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
1994                                    int StepLen) {
1995   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
1996          "Unexpected element width.");
1997   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
1998 
1999   unsigned NumOfElem = 16 / Width;
2000   unsigned MaskVal[16]; //  Width is never greater than 16
2001   for (unsigned i = 0; i < NumOfElem; ++i) {
2002     MaskVal[0] = N->getMaskElt(i * Width);
2003     if ((StepLen == 1) && (MaskVal[0] % Width)) {
2004       return false;
2005     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2006       return false;
2007     }
2008 
2009     for (unsigned int j = 1; j < Width; ++j) {
2010       MaskVal[j] = N->getMaskElt(i * Width + j);
2011       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2012         return false;
2013       }
2014     }
2015   }
2016 
2017   return true;
2018 }
2019 
2020 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2021                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2022   if (!isNByteElemShuffleMask(N, 4, 1))
2023     return false;
2024 
2025   // Now we look at mask elements 0,4,8,12
2026   unsigned M0 = N->getMaskElt(0) / 4;
2027   unsigned M1 = N->getMaskElt(4) / 4;
2028   unsigned M2 = N->getMaskElt(8) / 4;
2029   unsigned M3 = N->getMaskElt(12) / 4;
2030   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2031   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2032 
2033   // Below, let H and L be arbitrary elements of the shuffle mask
2034   // where H is in the range [4,7] and L is in the range [0,3].
2035   // H, 1, 2, 3 or L, 5, 6, 7
2036   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2037       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2038     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2039     InsertAtByte = IsLE ? 12 : 0;
2040     Swap = M0 < 4;
2041     return true;
2042   }
2043   // 0, H, 2, 3 or 4, L, 6, 7
2044   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2045       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2046     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2047     InsertAtByte = IsLE ? 8 : 4;
2048     Swap = M1 < 4;
2049     return true;
2050   }
2051   // 0, 1, H, 3 or 4, 5, L, 7
2052   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2053       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2054     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2055     InsertAtByte = IsLE ? 4 : 8;
2056     Swap = M2 < 4;
2057     return true;
2058   }
2059   // 0, 1, 2, H or 4, 5, 6, L
2060   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2061       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2062     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2063     InsertAtByte = IsLE ? 0 : 12;
2064     Swap = M3 < 4;
2065     return true;
2066   }
2067 
2068   // If both vector operands for the shuffle are the same vector, the mask will
2069   // contain only elements from the first one and the second one will be undef.
2070   if (N->getOperand(1).isUndef()) {
2071     ShiftElts = 0;
2072     Swap = true;
2073     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2074     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2075       InsertAtByte = IsLE ? 12 : 0;
2076       return true;
2077     }
2078     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2079       InsertAtByte = IsLE ? 8 : 4;
2080       return true;
2081     }
2082     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2083       InsertAtByte = IsLE ? 4 : 8;
2084       return true;
2085     }
2086     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2087       InsertAtByte = IsLE ? 0 : 12;
2088       return true;
2089     }
2090   }
2091 
2092   return false;
2093 }
2094 
2095 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2096                                bool &Swap, bool IsLE) {
2097   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2098   // Ensure each byte index of the word is consecutive.
2099   if (!isNByteElemShuffleMask(N, 4, 1))
2100     return false;
2101 
2102   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2103   unsigned M0 = N->getMaskElt(0) / 4;
2104   unsigned M1 = N->getMaskElt(4) / 4;
2105   unsigned M2 = N->getMaskElt(8) / 4;
2106   unsigned M3 = N->getMaskElt(12) / 4;
2107 
2108   // If both vector operands for the shuffle are the same vector, the mask will
2109   // contain only elements from the first one and the second one will be undef.
2110   if (N->getOperand(1).isUndef()) {
2111     assert(M0 < 4 && "Indexing into an undef vector?");
2112     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2113       return false;
2114 
2115     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2116     Swap = false;
2117     return true;
2118   }
2119 
2120   // Ensure each word index of the ShuffleVector Mask is consecutive.
2121   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2122     return false;
2123 
2124   if (IsLE) {
2125     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2126       // Input vectors don't need to be swapped if the leading element
2127       // of the result is one of the 3 left elements of the second vector
2128       // (or if there is no shift to be done at all).
2129       Swap = false;
2130       ShiftElts = (8 - M0) % 8;
2131     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2132       // Input vectors need to be swapped if the leading element
2133       // of the result is one of the 3 left elements of the first vector
2134       // (or if we're shifting by 4 - thereby simply swapping the vectors).
2135       Swap = true;
2136       ShiftElts = (4 - M0) % 4;
2137     }
2138 
2139     return true;
2140   } else {                                          // BE
2141     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2142       // Input vectors don't need to be swapped if the leading element
2143       // of the result is one of the 4 elements of the first vector.
2144       Swap = false;
2145       ShiftElts = M0;
2146     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2147       // Input vectors need to be swapped if the leading element
2148       // of the result is one of the 4 elements of the right vector.
2149       Swap = true;
2150       ShiftElts = M0 - 4;
2151     }
2152 
2153     return true;
2154   }
2155 }
2156 
2157 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2158   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2159 
2160   if (!isNByteElemShuffleMask(N, Width, -1))
2161     return false;
2162 
2163   for (int i = 0; i < 16; i += Width)
2164     if (N->getMaskElt(i) != i + Width - 1)
2165       return false;
2166 
2167   return true;
2168 }
2169 
2170 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2171   return isXXBRShuffleMaskHelper(N, 2);
2172 }
2173 
2174 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2175   return isXXBRShuffleMaskHelper(N, 4);
2176 }
2177 
2178 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2179   return isXXBRShuffleMaskHelper(N, 8);
2180 }
2181 
2182 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2183   return isXXBRShuffleMaskHelper(N, 16);
2184 }
2185 
2186 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2187 /// if the inputs to the instruction should be swapped and set \p DM to the
2188 /// value for the immediate.
2189 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2190 /// AND element 0 of the result comes from the first input (LE) or second input
2191 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2192 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2193 /// mask.
2194 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2195                                bool &Swap, bool IsLE) {
2196   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2197 
2198   // Ensure each byte index of the double word is consecutive.
2199   if (!isNByteElemShuffleMask(N, 8, 1))
2200     return false;
2201 
2202   unsigned M0 = N->getMaskElt(0) / 8;
2203   unsigned M1 = N->getMaskElt(8) / 8;
2204   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2205 
2206   // If both vector operands for the shuffle are the same vector, the mask will
2207   // contain only elements from the first one and the second one will be undef.
2208   if (N->getOperand(1).isUndef()) {
2209     if ((M0 | M1) < 2) {
2210       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2211       Swap = false;
2212       return true;
2213     } else
2214       return false;
2215   }
2216 
2217   if (IsLE) {
2218     if (M0 > 1 && M1 < 2) {
2219       Swap = false;
2220     } else if (M0 < 2 && M1 > 1) {
2221       M0 = (M0 + 2) % 4;
2222       M1 = (M1 + 2) % 4;
2223       Swap = true;
2224     } else
2225       return false;
2226 
2227     // Note: if control flow comes here that means Swap is already set above
2228     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2229     return true;
2230   } else { // BE
2231     if (M0 < 2 && M1 > 1) {
2232       Swap = false;
2233     } else if (M0 > 1 && M1 < 2) {
2234       M0 = (M0 + 2) % 4;
2235       M1 = (M1 + 2) % 4;
2236       Swap = true;
2237     } else
2238       return false;
2239 
2240     // Note: if control flow comes here that means Swap is already set above
2241     DM = (M0 << 1) + (M1 & 1);
2242     return true;
2243   }
2244 }
2245 
2246 
2247 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2248 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2249 /// elements are counted from the left of the vector register).
2250 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2251                                          SelectionDAG &DAG) {
2252   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2253   assert(isSplatShuffleMask(SVOp, EltSize));
2254   if (DAG.getDataLayout().isLittleEndian())
2255     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2256   else
2257     return SVOp->getMaskElt(0) / EltSize;
2258 }
2259 
2260 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2261 /// by using a vspltis[bhw] instruction of the specified element size, return
2262 /// the constant being splatted.  The ByteSize field indicates the number of
2263 /// bytes of each element [124] -> [bhw].
2264 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2265   SDValue OpVal(nullptr, 0);
2266 
2267   // If ByteSize of the splat is bigger than the element size of the
2268   // build_vector, then we have a case where we are checking for a splat where
2269   // multiple elements of the buildvector are folded together into a single
2270   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2271   unsigned EltSize = 16/N->getNumOperands();
2272   if (EltSize < ByteSize) {
2273     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2274     SDValue UniquedVals[4];
2275     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2276 
2277     // See if all of the elements in the buildvector agree across.
2278     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2279       if (N->getOperand(i).isUndef()) continue;
2280       // If the element isn't a constant, bail fully out.
2281       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2282 
2283       if (!UniquedVals[i&(Multiple-1)].getNode())
2284         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2285       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2286         return SDValue();  // no match.
2287     }
2288 
2289     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2290     // either constant or undef values that are identical for each chunk.  See
2291     // if these chunks can form into a larger vspltis*.
2292 
2293     // Check to see if all of the leading entries are either 0 or -1.  If
2294     // neither, then this won't fit into the immediate field.
2295     bool LeadingZero = true;
2296     bool LeadingOnes = true;
2297     for (unsigned i = 0; i != Multiple-1; ++i) {
2298       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2299 
2300       LeadingZero &= isNullConstant(UniquedVals[i]);
2301       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2302     }
2303     // Finally, check the least significant entry.
2304     if (LeadingZero) {
2305       if (!UniquedVals[Multiple-1].getNode())
2306         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2307       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2308       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2309         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2310     }
2311     if (LeadingOnes) {
2312       if (!UniquedVals[Multiple-1].getNode())
2313         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2314       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2315       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2316         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2317     }
2318 
2319     return SDValue();
2320   }
2321 
2322   // Check to see if this buildvec has a single non-undef value in its elements.
2323   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2324     if (N->getOperand(i).isUndef()) continue;
2325     if (!OpVal.getNode())
2326       OpVal = N->getOperand(i);
2327     else if (OpVal != N->getOperand(i))
2328       return SDValue();
2329   }
2330 
2331   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2332 
2333   unsigned ValSizeInBytes = EltSize;
2334   uint64_t Value = 0;
2335   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2336     Value = CN->getZExtValue();
2337   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2338     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2339     Value = FloatToBits(CN->getValueAPF().convertToFloat());
2340   }
2341 
2342   // If the splat value is larger than the element value, then we can never do
2343   // this splat.  The only case that we could fit the replicated bits into our
2344   // immediate field for would be zero, and we prefer to use vxor for it.
2345   if (ValSizeInBytes < ByteSize) return SDValue();
2346 
2347   // If the element value is larger than the splat value, check if it consists
2348   // of a repeated bit pattern of size ByteSize.
2349   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2350     return SDValue();
2351 
2352   // Properly sign extend the value.
2353   int MaskVal = SignExtend32(Value, ByteSize * 8);
2354 
2355   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2356   if (MaskVal == 0) return SDValue();
2357 
2358   // Finally, if this value fits in a 5 bit sext field, return it
2359   if (SignExtend32<5>(MaskVal) == MaskVal)
2360     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2361   return SDValue();
2362 }
2363 
2364 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2365 /// amount, otherwise return -1.
2366 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
2367   EVT VT = N->getValueType(0);
2368   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
2369     return -1;
2370 
2371   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2372 
2373   // Find the first non-undef value in the shuffle mask.
2374   unsigned i;
2375   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
2376     /*search*/;
2377 
2378   if (i == 4) return -1;  // all undef.
2379 
2380   // Otherwise, check to see if the rest of the elements are consecutively
2381   // numbered from this value.
2382   unsigned ShiftAmt = SVOp->getMaskElt(i);
2383   if (ShiftAmt < i) return -1;
2384   ShiftAmt -= i;
2385 
2386   // Check the rest of the elements to see if they are consecutive.
2387   for (++i; i != 4; ++i)
2388     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2389       return -1;
2390 
2391   return ShiftAmt;
2392 }
2393 
2394 //===----------------------------------------------------------------------===//
2395 //  Addressing Mode Selection
2396 //===----------------------------------------------------------------------===//
2397 
2398 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2399 /// or 64-bit immediate, and if the value can be accurately represented as a
2400 /// sign extension from a 16-bit value.  If so, this returns true and the
2401 /// immediate.
2402 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2403   if (!isa<ConstantSDNode>(N))
2404     return false;
2405 
2406   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2407   if (N->getValueType(0) == MVT::i32)
2408     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2409   else
2410     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2411 }
2412 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2413   return isIntS16Immediate(Op.getNode(), Imm);
2414 }
2415 
2416 
2417 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2418 /// be represented as an indexed [r+r] operation.
2419 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2420                                                SDValue &Index,
2421                                                SelectionDAG &DAG) const {
2422   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2423       UI != E; ++UI) {
2424     if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2425       if (Memop->getMemoryVT() == MVT::f64) {
2426           Base = N.getOperand(0);
2427           Index = N.getOperand(1);
2428           return true;
2429       }
2430     }
2431   }
2432   return false;
2433 }
2434 
2435 /// isIntS34Immediate - This method tests if value of node given can be
2436 /// accurately represented as a sign extension from a 34-bit value.  If so,
2437 /// this returns true and the immediate.
2438 bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2439   if (!isa<ConstantSDNode>(N))
2440     return false;
2441 
2442   Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2443   return isInt<34>(Imm);
2444 }
2445 bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2446   return isIntS34Immediate(Op.getNode(), Imm);
2447 }
2448 
2449 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2450 /// can be represented as an indexed [r+r] operation.  Returns false if it
2451 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2452 /// non-zero and N can be represented by a base register plus a signed 16-bit
2453 /// displacement, make a more precise judgement by checking (displacement % \p
2454 /// EncodingAlignment).
2455 bool PPCTargetLowering::SelectAddressRegReg(
2456     SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2457     MaybeAlign EncodingAlignment) const {
2458   // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2459   // a [pc+imm].
2460   if (SelectAddressPCRel(N, Base))
2461     return false;
2462 
2463   int16_t Imm = 0;
2464   if (N.getOpcode() == ISD::ADD) {
2465     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2466     // SPE load/store can only handle 8-bit offsets.
2467     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2468         return true;
2469     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2470         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2471       return false; // r+i
2472     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2473       return false;    // r+i
2474 
2475     Base = N.getOperand(0);
2476     Index = N.getOperand(1);
2477     return true;
2478   } else if (N.getOpcode() == ISD::OR) {
2479     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2480         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2481       return false; // r+i can fold it if we can.
2482 
2483     // If this is an or of disjoint bitfields, we can codegen this as an add
2484     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2485     // disjoint.
2486     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2487 
2488     if (LHSKnown.Zero.getBoolValue()) {
2489       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2490       // If all of the bits are known zero on the LHS or RHS, the add won't
2491       // carry.
2492       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2493         Base = N.getOperand(0);
2494         Index = N.getOperand(1);
2495         return true;
2496       }
2497     }
2498   }
2499 
2500   return false;
2501 }
2502 
2503 // If we happen to be doing an i64 load or store into a stack slot that has
2504 // less than a 4-byte alignment, then the frame-index elimination may need to
2505 // use an indexed load or store instruction (because the offset may not be a
2506 // multiple of 4). The extra register needed to hold the offset comes from the
2507 // register scavenger, and it is possible that the scavenger will need to use
2508 // an emergency spill slot. As a result, we need to make sure that a spill slot
2509 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2510 // stack slot.
2511 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2512   // FIXME: This does not handle the LWA case.
2513   if (VT != MVT::i64)
2514     return;
2515 
2516   // NOTE: We'll exclude negative FIs here, which come from argument
2517   // lowering, because there are no known test cases triggering this problem
2518   // using packed structures (or similar). We can remove this exclusion if
2519   // we find such a test case. The reason why this is so test-case driven is
2520   // because this entire 'fixup' is only to prevent crashes (from the
2521   // register scavenger) on not-really-valid inputs. For example, if we have:
2522   //   %a = alloca i1
2523   //   %b = bitcast i1* %a to i64*
2524   //   store i64* a, i64 b
2525   // then the store should really be marked as 'align 1', but is not. If it
2526   // were marked as 'align 1' then the indexed form would have been
2527   // instruction-selected initially, and the problem this 'fixup' is preventing
2528   // won't happen regardless.
2529   if (FrameIdx < 0)
2530     return;
2531 
2532   MachineFunction &MF = DAG.getMachineFunction();
2533   MachineFrameInfo &MFI = MF.getFrameInfo();
2534 
2535   if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2536     return;
2537 
2538   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2539   FuncInfo->setHasNonRISpills();
2540 }
2541 
2542 /// Returns true if the address N can be represented by a base register plus
2543 /// a signed 16-bit displacement [r+imm], and if it is not better
2544 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2545 /// displacements that are multiples of that value.
2546 bool PPCTargetLowering::SelectAddressRegImm(
2547     SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2548     MaybeAlign EncodingAlignment) const {
2549   // FIXME dl should come from parent load or store, not from address
2550   SDLoc dl(N);
2551 
2552   // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2553   // a [pc+imm].
2554   if (SelectAddressPCRel(N, Base))
2555     return false;
2556 
2557   // If this can be more profitably realized as r+r, fail.
2558   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2559     return false;
2560 
2561   if (N.getOpcode() == ISD::ADD) {
2562     int16_t imm = 0;
2563     if (isIntS16Immediate(N.getOperand(1), imm) &&
2564         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2565       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2566       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2567         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2568         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2569       } else {
2570         Base = N.getOperand(0);
2571       }
2572       return true; // [r+i]
2573     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2574       // Match LOAD (ADD (X, Lo(G))).
2575       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2576              && "Cannot handle constant offsets yet!");
2577       Disp = N.getOperand(1).getOperand(0);  // The global address.
2578       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2579              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2580              Disp.getOpcode() == ISD::TargetConstantPool ||
2581              Disp.getOpcode() == ISD::TargetJumpTable);
2582       Base = N.getOperand(0);
2583       return true;  // [&g+r]
2584     }
2585   } else if (N.getOpcode() == ISD::OR) {
2586     int16_t imm = 0;
2587     if (isIntS16Immediate(N.getOperand(1), imm) &&
2588         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2589       // If this is an or of disjoint bitfields, we can codegen this as an add
2590       // (for better address arithmetic) if the LHS and RHS of the OR are
2591       // provably disjoint.
2592       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2593 
2594       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2595         // If all of the bits are known zero on the LHS or RHS, the add won't
2596         // carry.
2597         if (FrameIndexSDNode *FI =
2598               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2599           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2600           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2601         } else {
2602           Base = N.getOperand(0);
2603         }
2604         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2605         return true;
2606       }
2607     }
2608   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2609     // Loading from a constant address.
2610 
2611     // If this address fits entirely in a 16-bit sext immediate field, codegen
2612     // this as "d, 0"
2613     int16_t Imm;
2614     if (isIntS16Immediate(CN, Imm) &&
2615         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2616       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2617       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2618                              CN->getValueType(0));
2619       return true;
2620     }
2621 
2622     // Handle 32-bit sext immediates with LIS + addr mode.
2623     if ((CN->getValueType(0) == MVT::i32 ||
2624          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2625         (!EncodingAlignment ||
2626          isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2627       int Addr = (int)CN->getZExtValue();
2628 
2629       // Otherwise, break this down into an LIS + disp.
2630       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2631 
2632       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2633                                    MVT::i32);
2634       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2635       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2636       return true;
2637     }
2638   }
2639 
2640   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2641   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2642     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2643     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2644   } else
2645     Base = N;
2646   return true;      // [r+0]
2647 }
2648 
2649 /// Similar to the 16-bit case but for instructions that take a 34-bit
2650 /// displacement field (prefixed loads/stores).
2651 bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2652                                               SDValue &Base,
2653                                               SelectionDAG &DAG) const {
2654   // Only on 64-bit targets.
2655   if (N.getValueType() != MVT::i64)
2656     return false;
2657 
2658   SDLoc dl(N);
2659   int64_t Imm = 0;
2660 
2661   if (N.getOpcode() == ISD::ADD) {
2662     if (!isIntS34Immediate(N.getOperand(1), Imm))
2663       return false;
2664     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2665     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2666       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2667     else
2668       Base = N.getOperand(0);
2669     return true;
2670   }
2671 
2672   if (N.getOpcode() == ISD::OR) {
2673     if (!isIntS34Immediate(N.getOperand(1), Imm))
2674       return false;
2675     // If this is an or of disjoint bitfields, we can codegen this as an add
2676     // (for better address arithmetic) if the LHS and RHS of the OR are
2677     // provably disjoint.
2678     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2679     if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2680       return false;
2681     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2682       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2683     else
2684       Base = N.getOperand(0);
2685     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2686     return true;
2687   }
2688 
2689   if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2690     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2691     Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2692     return true;
2693   }
2694 
2695   return false;
2696 }
2697 
2698 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2699 /// represented as an indexed [r+r] operation.
2700 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2701                                                 SDValue &Index,
2702                                                 SelectionDAG &DAG) const {
2703   // Check to see if we can easily represent this as an [r+r] address.  This
2704   // will fail if it thinks that the address is more profitably represented as
2705   // reg+imm, e.g. where imm = 0.
2706   if (SelectAddressRegReg(N, Base, Index, DAG))
2707     return true;
2708 
2709   // If the address is the result of an add, we will utilize the fact that the
2710   // address calculation includes an implicit add.  However, we can reduce
2711   // register pressure if we do not materialize a constant just for use as the
2712   // index register.  We only get rid of the add if it is not an add of a
2713   // value and a 16-bit signed constant and both have a single use.
2714   int16_t imm = 0;
2715   if (N.getOpcode() == ISD::ADD &&
2716       (!isIntS16Immediate(N.getOperand(1), imm) ||
2717        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2718     Base = N.getOperand(0);
2719     Index = N.getOperand(1);
2720     return true;
2721   }
2722 
2723   // Otherwise, do it the hard way, using R0 as the base register.
2724   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2725                          N.getValueType());
2726   Index = N;
2727   return true;
2728 }
2729 
2730 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2731   Ty *PCRelCand = dyn_cast<Ty>(N);
2732   return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2733 }
2734 
2735 /// Returns true if this address is a PC Relative address.
2736 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2737 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2738 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2739   // This is a materialize PC Relative node. Always select this as PC Relative.
2740   Base = N;
2741   if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2742     return true;
2743   if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2744       isValidPCRelNode<GlobalAddressSDNode>(N) ||
2745       isValidPCRelNode<JumpTableSDNode>(N) ||
2746       isValidPCRelNode<BlockAddressSDNode>(N))
2747     return true;
2748   return false;
2749 }
2750 
2751 /// Returns true if we should use a direct load into vector instruction
2752 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2753 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2754 
2755   // If there are any other uses other than scalar to vector, then we should
2756   // keep it as a scalar load -> direct move pattern to prevent multiple
2757   // loads.
2758   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2759   if (!LD)
2760     return false;
2761 
2762   EVT MemVT = LD->getMemoryVT();
2763   if (!MemVT.isSimple())
2764     return false;
2765   switch(MemVT.getSimpleVT().SimpleTy) {
2766   case MVT::i64:
2767     break;
2768   case MVT::i32:
2769     if (!ST.hasP8Vector())
2770       return false;
2771     break;
2772   case MVT::i16:
2773   case MVT::i8:
2774     if (!ST.hasP9Vector())
2775       return false;
2776     break;
2777   default:
2778     return false;
2779   }
2780 
2781   SDValue LoadedVal(N, 0);
2782   if (!LoadedVal.hasOneUse())
2783     return false;
2784 
2785   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2786        UI != UE; ++UI)
2787     if (UI.getUse().get().getResNo() == 0 &&
2788         UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2789         UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2790       return false;
2791 
2792   return true;
2793 }
2794 
2795 /// getPreIndexedAddressParts - returns true by value, base pointer and
2796 /// offset pointer and addressing mode by reference if the node's address
2797 /// can be legally represented as pre-indexed load / store address.
2798 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2799                                                   SDValue &Offset,
2800                                                   ISD::MemIndexedMode &AM,
2801                                                   SelectionDAG &DAG) const {
2802   if (DisablePPCPreinc) return false;
2803 
2804   bool isLoad = true;
2805   SDValue Ptr;
2806   EVT VT;
2807   unsigned Alignment;
2808   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2809     Ptr = LD->getBasePtr();
2810     VT = LD->getMemoryVT();
2811     Alignment = LD->getAlignment();
2812   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2813     Ptr = ST->getBasePtr();
2814     VT  = ST->getMemoryVT();
2815     Alignment = ST->getAlignment();
2816     isLoad = false;
2817   } else
2818     return false;
2819 
2820   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2821   // instructions because we can fold these into a more efficient instruction
2822   // instead, (such as LXSD).
2823   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2824     return false;
2825   }
2826 
2827   // PowerPC doesn't have preinc load/store instructions for vectors
2828   if (VT.isVector())
2829     return false;
2830 
2831   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2832     // Common code will reject creating a pre-inc form if the base pointer
2833     // is a frame index, or if N is a store and the base pointer is either
2834     // the same as or a predecessor of the value being stored.  Check for
2835     // those situations here, and try with swapped Base/Offset instead.
2836     bool Swap = false;
2837 
2838     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2839       Swap = true;
2840     else if (!isLoad) {
2841       SDValue Val = cast<StoreSDNode>(N)->getValue();
2842       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2843         Swap = true;
2844     }
2845 
2846     if (Swap)
2847       std::swap(Base, Offset);
2848 
2849     AM = ISD::PRE_INC;
2850     return true;
2851   }
2852 
2853   // LDU/STU can only handle immediates that are a multiple of 4.
2854   if (VT != MVT::i64) {
2855     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2856       return false;
2857   } else {
2858     // LDU/STU need an address with at least 4-byte alignment.
2859     if (Alignment < 4)
2860       return false;
2861 
2862     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2863       return false;
2864   }
2865 
2866   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2867     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2868     // sext i32 to i64 when addr mode is r+i.
2869     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2870         LD->getExtensionType() == ISD::SEXTLOAD &&
2871         isa<ConstantSDNode>(Offset))
2872       return false;
2873   }
2874 
2875   AM = ISD::PRE_INC;
2876   return true;
2877 }
2878 
2879 //===----------------------------------------------------------------------===//
2880 //  LowerOperation implementation
2881 //===----------------------------------------------------------------------===//
2882 
2883 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2884 /// and LoOpFlags to the target MO flags.
2885 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2886                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2887                                const GlobalValue *GV = nullptr) {
2888   HiOpFlags = PPCII::MO_HA;
2889   LoOpFlags = PPCII::MO_LO;
2890 
2891   // Don't use the pic base if not in PIC relocation model.
2892   if (IsPIC) {
2893     HiOpFlags |= PPCII::MO_PIC_FLAG;
2894     LoOpFlags |= PPCII::MO_PIC_FLAG;
2895   }
2896 }
2897 
2898 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2899                              SelectionDAG &DAG) {
2900   SDLoc DL(HiPart);
2901   EVT PtrVT = HiPart.getValueType();
2902   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2903 
2904   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2905   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2906 
2907   // With PIC, the first instruction is actually "GR+hi(&G)".
2908   if (isPIC)
2909     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2910                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2911 
2912   // Generate non-pic code that has direct accesses to the constant pool.
2913   // The address of the global is just (hi(&g)+lo(&g)).
2914   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2915 }
2916 
2917 static void setUsesTOCBasePtr(MachineFunction &MF) {
2918   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2919   FuncInfo->setUsesTOCBasePtr();
2920 }
2921 
2922 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2923   setUsesTOCBasePtr(DAG.getMachineFunction());
2924 }
2925 
2926 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2927                                        SDValue GA) const {
2928   const bool Is64Bit = Subtarget.isPPC64();
2929   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2930   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2931                         : Subtarget.isAIXABI()
2932                               ? DAG.getRegister(PPC::R2, VT)
2933                               : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2934   SDValue Ops[] = { GA, Reg };
2935   return DAG.getMemIntrinsicNode(
2936       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2937       MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2938       MachineMemOperand::MOLoad);
2939 }
2940 
2941 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2942                                              SelectionDAG &DAG) const {
2943   EVT PtrVT = Op.getValueType();
2944   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2945   const Constant *C = CP->getConstVal();
2946 
2947   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2948   // The actual address of the GlobalValue is stored in the TOC.
2949   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2950     if (Subtarget.isUsingPCRelativeCalls()) {
2951       SDLoc DL(CP);
2952       EVT Ty = getPointerTy(DAG.getDataLayout());
2953       SDValue ConstPool = DAG.getTargetConstantPool(
2954           C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2955       return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2956     }
2957     setUsesTOCBasePtr(DAG);
2958     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2959     return getTOCEntry(DAG, SDLoc(CP), GA);
2960   }
2961 
2962   unsigned MOHiFlag, MOLoFlag;
2963   bool IsPIC = isPositionIndependent();
2964   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2965 
2966   if (IsPIC && Subtarget.isSVR4ABI()) {
2967     SDValue GA =
2968         DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2969     return getTOCEntry(DAG, SDLoc(CP), GA);
2970   }
2971 
2972   SDValue CPIHi =
2973       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2974   SDValue CPILo =
2975       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2976   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2977 }
2978 
2979 // For 64-bit PowerPC, prefer the more compact relative encodings.
2980 // This trades 32 bits per jump table entry for one or two instructions
2981 // on the jump site.
2982 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2983   if (isJumpTableRelative())
2984     return MachineJumpTableInfo::EK_LabelDifference32;
2985 
2986   return TargetLowering::getJumpTableEncoding();
2987 }
2988 
2989 bool PPCTargetLowering::isJumpTableRelative() const {
2990   if (UseAbsoluteJumpTables)
2991     return false;
2992   if (Subtarget.isPPC64() || Subtarget.isAIXABI())
2993     return true;
2994   return TargetLowering::isJumpTableRelative();
2995 }
2996 
2997 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
2998                                                     SelectionDAG &DAG) const {
2999   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3000     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3001 
3002   switch (getTargetMachine().getCodeModel()) {
3003   case CodeModel::Small:
3004   case CodeModel::Medium:
3005     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3006   default:
3007     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3008                        getPointerTy(DAG.getDataLayout()));
3009   }
3010 }
3011 
3012 const MCExpr *
3013 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3014                                                 unsigned JTI,
3015                                                 MCContext &Ctx) const {
3016   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3017     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3018 
3019   switch (getTargetMachine().getCodeModel()) {
3020   case CodeModel::Small:
3021   case CodeModel::Medium:
3022     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3023   default:
3024     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3025   }
3026 }
3027 
3028 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3029   EVT PtrVT = Op.getValueType();
3030   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3031 
3032   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3033   if (Subtarget.isUsingPCRelativeCalls()) {
3034     SDLoc DL(JT);
3035     EVT Ty = getPointerTy(DAG.getDataLayout());
3036     SDValue GA =
3037         DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3038     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3039     return MatAddr;
3040   }
3041 
3042   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3043   // The actual address of the GlobalValue is stored in the TOC.
3044   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3045     setUsesTOCBasePtr(DAG);
3046     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3047     return getTOCEntry(DAG, SDLoc(JT), GA);
3048   }
3049 
3050   unsigned MOHiFlag, MOLoFlag;
3051   bool IsPIC = isPositionIndependent();
3052   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3053 
3054   if (IsPIC && Subtarget.isSVR4ABI()) {
3055     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3056                                         PPCII::MO_PIC_FLAG);
3057     return getTOCEntry(DAG, SDLoc(GA), GA);
3058   }
3059 
3060   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3061   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3062   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3063 }
3064 
3065 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3066                                              SelectionDAG &DAG) const {
3067   EVT PtrVT = Op.getValueType();
3068   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3069   const BlockAddress *BA = BASDN->getBlockAddress();
3070 
3071   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3072   if (Subtarget.isUsingPCRelativeCalls()) {
3073     SDLoc DL(BASDN);
3074     EVT Ty = getPointerTy(DAG.getDataLayout());
3075     SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3076                                            PPCII::MO_PCREL_FLAG);
3077     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3078     return MatAddr;
3079   }
3080 
3081   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3082   // The actual BlockAddress is stored in the TOC.
3083   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3084     setUsesTOCBasePtr(DAG);
3085     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3086     return getTOCEntry(DAG, SDLoc(BASDN), GA);
3087   }
3088 
3089   // 32-bit position-independent ELF stores the BlockAddress in the .got.
3090   if (Subtarget.is32BitELFABI() && isPositionIndependent())
3091     return getTOCEntry(
3092         DAG, SDLoc(BASDN),
3093         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3094 
3095   unsigned MOHiFlag, MOLoFlag;
3096   bool IsPIC = isPositionIndependent();
3097   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3098   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3099   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3100   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3101 }
3102 
3103 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3104                                               SelectionDAG &DAG) const {
3105   // FIXME: TLS addresses currently use medium model code sequences,
3106   // which is the most useful form.  Eventually support for small and
3107   // large models could be added if users need it, at the cost of
3108   // additional complexity.
3109   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3110   if (DAG.getTarget().useEmulatedTLS())
3111     return LowerToTLSEmulatedModel(GA, DAG);
3112 
3113   SDLoc dl(GA);
3114   const GlobalValue *GV = GA->getGlobal();
3115   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3116   bool is64bit = Subtarget.isPPC64();
3117   const Module *M = DAG.getMachineFunction().getFunction().getParent();
3118   PICLevel::Level picLevel = M->getPICLevel();
3119 
3120   const TargetMachine &TM = getTargetMachine();
3121   TLSModel::Model Model = TM.getTLSModel(GV);
3122 
3123   if (Model == TLSModel::LocalExec) {
3124     if (Subtarget.isUsingPCRelativeCalls()) {
3125       SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3126       SDValue TGA = DAG.getTargetGlobalAddress(
3127           GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3128       SDValue MatAddr =
3129           DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3130       return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3131     }
3132 
3133     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3134                                                PPCII::MO_TPREL_HA);
3135     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3136                                                PPCII::MO_TPREL_LO);
3137     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3138                              : DAG.getRegister(PPC::R2, MVT::i32);
3139 
3140     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3141     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3142   }
3143 
3144   if (Model == TLSModel::InitialExec) {
3145     bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3146     SDValue TGA = DAG.getTargetGlobalAddress(
3147         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3148     SDValue TGATLS = DAG.getTargetGlobalAddress(
3149         GV, dl, PtrVT, 0,
3150         IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3151     SDValue TPOffset;
3152     if (IsPCRel) {
3153       SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3154       TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3155                              MachinePointerInfo());
3156     } else {
3157       SDValue GOTPtr;
3158       if (is64bit) {
3159         setUsesTOCBasePtr(DAG);
3160         SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3161         GOTPtr =
3162             DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3163       } else {
3164         if (!TM.isPositionIndependent())
3165           GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3166         else if (picLevel == PICLevel::SmallPIC)
3167           GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3168         else
3169           GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3170       }
3171       TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3172     }
3173     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3174   }
3175 
3176   if (Model == TLSModel::GeneralDynamic) {
3177     if (Subtarget.isUsingPCRelativeCalls()) {
3178       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3179                                                PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3180       return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3181     }
3182 
3183     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3184     SDValue GOTPtr;
3185     if (is64bit) {
3186       setUsesTOCBasePtr(DAG);
3187       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3188       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3189                                    GOTReg, TGA);
3190     } else {
3191       if (picLevel == PICLevel::SmallPIC)
3192         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3193       else
3194         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3195     }
3196     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3197                        GOTPtr, TGA, TGA);
3198   }
3199 
3200   if (Model == TLSModel::LocalDynamic) {
3201     if (Subtarget.isUsingPCRelativeCalls()) {
3202       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3203                                                PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3204       SDValue MatPCRel =
3205           DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3206       return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3207     }
3208 
3209     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3210     SDValue GOTPtr;
3211     if (is64bit) {
3212       setUsesTOCBasePtr(DAG);
3213       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3214       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3215                            GOTReg, TGA);
3216     } else {
3217       if (picLevel == PICLevel::SmallPIC)
3218         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3219       else
3220         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3221     }
3222     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3223                                   PtrVT, GOTPtr, TGA, TGA);
3224     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3225                                       PtrVT, TLSAddr, TGA);
3226     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3227   }
3228 
3229   llvm_unreachable("Unknown TLS model!");
3230 }
3231 
3232 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3233                                               SelectionDAG &DAG) const {
3234   EVT PtrVT = Op.getValueType();
3235   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3236   SDLoc DL(GSDN);
3237   const GlobalValue *GV = GSDN->getGlobal();
3238 
3239   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3240   // The actual address of the GlobalValue is stored in the TOC.
3241   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3242     if (Subtarget.isUsingPCRelativeCalls()) {
3243       EVT Ty = getPointerTy(DAG.getDataLayout());
3244       if (isAccessedAsGotIndirect(Op)) {
3245         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3246                                                 PPCII::MO_PCREL_FLAG |
3247                                                     PPCII::MO_GOT_FLAG);
3248         SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3249         SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3250                                    MachinePointerInfo());
3251         return Load;
3252       } else {
3253         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3254                                                 PPCII::MO_PCREL_FLAG);
3255         return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3256       }
3257     }
3258     setUsesTOCBasePtr(DAG);
3259     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3260     return getTOCEntry(DAG, DL, GA);
3261   }
3262 
3263   unsigned MOHiFlag, MOLoFlag;
3264   bool IsPIC = isPositionIndependent();
3265   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3266 
3267   if (IsPIC && Subtarget.isSVR4ABI()) {
3268     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3269                                             GSDN->getOffset(),
3270                                             PPCII::MO_PIC_FLAG);
3271     return getTOCEntry(DAG, DL, GA);
3272   }
3273 
3274   SDValue GAHi =
3275     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3276   SDValue GALo =
3277     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3278 
3279   return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3280 }
3281 
3282 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3283   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3284   SDLoc dl(Op);
3285 
3286   if (Op.getValueType() == MVT::v2i64) {
3287     // When the operands themselves are v2i64 values, we need to do something
3288     // special because VSX has no underlying comparison operations for these.
3289     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3290       // Equality can be handled by casting to the legal type for Altivec
3291       // comparisons, everything else needs to be expanded.
3292       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3293         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3294                  DAG.getSetCC(dl, MVT::v4i32,
3295                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3296                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3297                    CC));
3298       }
3299 
3300       return SDValue();
3301     }
3302 
3303     // We handle most of these in the usual way.
3304     return Op;
3305   }
3306 
3307   // If we're comparing for equality to zero, expose the fact that this is
3308   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3309   // fold the new nodes.
3310   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3311     return V;
3312 
3313   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3314     // Leave comparisons against 0 and -1 alone for now, since they're usually
3315     // optimized.  FIXME: revisit this when we can custom lower all setcc
3316     // optimizations.
3317     if (C->isAllOnesValue() || C->isNullValue())
3318       return SDValue();
3319   }
3320 
3321   // If we have an integer seteq/setne, turn it into a compare against zero
3322   // by xor'ing the rhs with the lhs, which is faster than setting a
3323   // condition register, reading it back out, and masking the correct bit.  The
3324   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3325   // the result to other bit-twiddling opportunities.
3326   EVT LHSVT = Op.getOperand(0).getValueType();
3327   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3328     EVT VT = Op.getValueType();
3329     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3330                                 Op.getOperand(1));
3331     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3332   }
3333   return SDValue();
3334 }
3335 
3336 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3337   SDNode *Node = Op.getNode();
3338   EVT VT = Node->getValueType(0);
3339   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3340   SDValue InChain = Node->getOperand(0);
3341   SDValue VAListPtr = Node->getOperand(1);
3342   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3343   SDLoc dl(Node);
3344 
3345   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3346 
3347   // gpr_index
3348   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3349                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3350   InChain = GprIndex.getValue(1);
3351 
3352   if (VT == MVT::i64) {
3353     // Check if GprIndex is even
3354     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3355                                  DAG.getConstant(1, dl, MVT::i32));
3356     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3357                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3358     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3359                                           DAG.getConstant(1, dl, MVT::i32));
3360     // Align GprIndex to be even if it isn't
3361     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3362                            GprIndex);
3363   }
3364 
3365   // fpr index is 1 byte after gpr
3366   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3367                                DAG.getConstant(1, dl, MVT::i32));
3368 
3369   // fpr
3370   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3371                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3372   InChain = FprIndex.getValue(1);
3373 
3374   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3375                                        DAG.getConstant(8, dl, MVT::i32));
3376 
3377   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3378                                         DAG.getConstant(4, dl, MVT::i32));
3379 
3380   // areas
3381   SDValue OverflowArea =
3382       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3383   InChain = OverflowArea.getValue(1);
3384 
3385   SDValue RegSaveArea =
3386       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3387   InChain = RegSaveArea.getValue(1);
3388 
3389   // select overflow_area if index > 8
3390   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3391                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3392 
3393   // adjustment constant gpr_index * 4/8
3394   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3395                                     VT.isInteger() ? GprIndex : FprIndex,
3396                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3397                                                     MVT::i32));
3398 
3399   // OurReg = RegSaveArea + RegConstant
3400   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3401                                RegConstant);
3402 
3403   // Floating types are 32 bytes into RegSaveArea
3404   if (VT.isFloatingPoint())
3405     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3406                          DAG.getConstant(32, dl, MVT::i32));
3407 
3408   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3409   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3410                                    VT.isInteger() ? GprIndex : FprIndex,
3411                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3412                                                    MVT::i32));
3413 
3414   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3415                               VT.isInteger() ? VAListPtr : FprPtr,
3416                               MachinePointerInfo(SV), MVT::i8);
3417 
3418   // determine if we should load from reg_save_area or overflow_area
3419   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3420 
3421   // increase overflow_area by 4/8 if gpr/fpr > 8
3422   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3423                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3424                                           dl, MVT::i32));
3425 
3426   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3427                              OverflowAreaPlusN);
3428 
3429   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3430                               MachinePointerInfo(), MVT::i32);
3431 
3432   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3433 }
3434 
3435 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3436   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3437 
3438   // We have to copy the entire va_list struct:
3439   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3440   return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3441                        DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3442                        false, true, false, MachinePointerInfo(),
3443                        MachinePointerInfo());
3444 }
3445 
3446 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3447                                                   SelectionDAG &DAG) const {
3448   if (Subtarget.isAIXABI())
3449     report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3450 
3451   return Op.getOperand(0);
3452 }
3453 
3454 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3455                                                 SelectionDAG &DAG) const {
3456   if (Subtarget.isAIXABI())
3457     report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3458 
3459   SDValue Chain = Op.getOperand(0);
3460   SDValue Trmp = Op.getOperand(1); // trampoline
3461   SDValue FPtr = Op.getOperand(2); // nested function
3462   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3463   SDLoc dl(Op);
3464 
3465   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3466   bool isPPC64 = (PtrVT == MVT::i64);
3467   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3468 
3469   TargetLowering::ArgListTy Args;
3470   TargetLowering::ArgListEntry Entry;
3471 
3472   Entry.Ty = IntPtrTy;
3473   Entry.Node = Trmp; Args.push_back(Entry);
3474 
3475   // TrampSize == (isPPC64 ? 48 : 40);
3476   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3477                                isPPC64 ? MVT::i64 : MVT::i32);
3478   Args.push_back(Entry);
3479 
3480   Entry.Node = FPtr; Args.push_back(Entry);
3481   Entry.Node = Nest; Args.push_back(Entry);
3482 
3483   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3484   TargetLowering::CallLoweringInfo CLI(DAG);
3485   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3486       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3487       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3488 
3489   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3490   return CallResult.second;
3491 }
3492 
3493 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3494   MachineFunction &MF = DAG.getMachineFunction();
3495   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3496   EVT PtrVT = getPointerTy(MF.getDataLayout());
3497 
3498   SDLoc dl(Op);
3499 
3500   if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3501     // vastart just stores the address of the VarArgsFrameIndex slot into the
3502     // memory location argument.
3503     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3504     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3505     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3506                         MachinePointerInfo(SV));
3507   }
3508 
3509   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3510   // We suppose the given va_list is already allocated.
3511   //
3512   // typedef struct {
3513   //  char gpr;     /* index into the array of 8 GPRs
3514   //                 * stored in the register save area
3515   //                 * gpr=0 corresponds to r3,
3516   //                 * gpr=1 to r4, etc.
3517   //                 */
3518   //  char fpr;     /* index into the array of 8 FPRs
3519   //                 * stored in the register save area
3520   //                 * fpr=0 corresponds to f1,
3521   //                 * fpr=1 to f2, etc.
3522   //                 */
3523   //  char *overflow_arg_area;
3524   //                /* location on stack that holds
3525   //                 * the next overflow argument
3526   //                 */
3527   //  char *reg_save_area;
3528   //               /* where r3:r10 and f1:f8 (if saved)
3529   //                * are stored
3530   //                */
3531   // } va_list[1];
3532 
3533   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3534   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3535   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3536                                             PtrVT);
3537   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3538                                  PtrVT);
3539 
3540   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3541   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3542 
3543   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3544   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3545 
3546   uint64_t FPROffset = 1;
3547   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3548 
3549   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3550 
3551   // Store first byte : number of int regs
3552   SDValue firstStore =
3553       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3554                         MachinePointerInfo(SV), MVT::i8);
3555   uint64_t nextOffset = FPROffset;
3556   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3557                                   ConstFPROffset);
3558 
3559   // Store second byte : number of float regs
3560   SDValue secondStore =
3561       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3562                         MachinePointerInfo(SV, nextOffset), MVT::i8);
3563   nextOffset += StackOffset;
3564   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3565 
3566   // Store second word : arguments given on stack
3567   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3568                                     MachinePointerInfo(SV, nextOffset));
3569   nextOffset += FrameOffset;
3570   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3571 
3572   // Store third word : arguments given in registers
3573   return DAG.getStore(thirdStore, dl, FR, nextPtr,
3574                       MachinePointerInfo(SV, nextOffset));
3575 }
3576 
3577 /// FPR - The set of FP registers that should be allocated for arguments
3578 /// on Darwin and AIX.
3579 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3580                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3581                                 PPC::F11, PPC::F12, PPC::F13};
3582 
3583 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3584 /// the stack.
3585 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3586                                        unsigned PtrByteSize) {
3587   unsigned ArgSize = ArgVT.getStoreSize();
3588   if (Flags.isByVal())
3589     ArgSize = Flags.getByValSize();
3590 
3591   // Round up to multiples of the pointer size, except for array members,
3592   // which are always packed.
3593   if (!Flags.isInConsecutiveRegs())
3594     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3595 
3596   return ArgSize;
3597 }
3598 
3599 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3600 /// on the stack.
3601 static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3602                                          ISD::ArgFlagsTy Flags,
3603                                          unsigned PtrByteSize) {
3604   Align Alignment(PtrByteSize);
3605 
3606   // Altivec parameters are padded to a 16 byte boundary.
3607   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3608       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3609       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3610       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3611     Alignment = Align(16);
3612 
3613   // ByVal parameters are aligned as requested.
3614   if (Flags.isByVal()) {
3615     auto BVAlign = Flags.getNonZeroByValAlign();
3616     if (BVAlign > PtrByteSize) {
3617       if (BVAlign.value() % PtrByteSize != 0)
3618         llvm_unreachable(
3619             "ByVal alignment is not a multiple of the pointer size");
3620 
3621       Alignment = BVAlign;
3622     }
3623   }
3624 
3625   // Array members are always packed to their original alignment.
3626   if (Flags.isInConsecutiveRegs()) {
3627     // If the array member was split into multiple registers, the first
3628     // needs to be aligned to the size of the full type.  (Except for
3629     // ppcf128, which is only aligned as its f64 components.)
3630     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3631       Alignment = Align(OrigVT.getStoreSize());
3632     else
3633       Alignment = Align(ArgVT.getStoreSize());
3634   }
3635 
3636   return Alignment;
3637 }
3638 
3639 /// CalculateStackSlotUsed - Return whether this argument will use its
3640 /// stack slot (instead of being passed in registers).  ArgOffset,
3641 /// AvailableFPRs, and AvailableVRs must hold the current argument
3642 /// position, and will be updated to account for this argument.
3643 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3644                                    unsigned PtrByteSize, unsigned LinkageSize,
3645                                    unsigned ParamAreaSize, unsigned &ArgOffset,
3646                                    unsigned &AvailableFPRs,
3647                                    unsigned &AvailableVRs) {
3648   bool UseMemory = false;
3649 
3650   // Respect alignment of argument on the stack.
3651   Align Alignment =
3652       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3653   ArgOffset = alignTo(ArgOffset, Alignment);
3654   // If there's no space left in the argument save area, we must
3655   // use memory (this check also catches zero-sized arguments).
3656   if (ArgOffset >= LinkageSize + ParamAreaSize)
3657     UseMemory = true;
3658 
3659   // Allocate argument on the stack.
3660   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3661   if (Flags.isInConsecutiveRegsLast())
3662     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3663   // If we overran the argument save area, we must use memory
3664   // (this check catches arguments passed partially in memory)
3665   if (ArgOffset > LinkageSize + ParamAreaSize)
3666     UseMemory = true;
3667 
3668   // However, if the argument is actually passed in an FPR or a VR,
3669   // we don't use memory after all.
3670   if (!Flags.isByVal()) {
3671     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3672       if (AvailableFPRs > 0) {
3673         --AvailableFPRs;
3674         return false;
3675       }
3676     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3677         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3678         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3679         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3680       if (AvailableVRs > 0) {
3681         --AvailableVRs;
3682         return false;
3683       }
3684   }
3685 
3686   return UseMemory;
3687 }
3688 
3689 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3690 /// ensure minimum alignment required for target.
3691 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3692                                      unsigned NumBytes) {
3693   return alignTo(NumBytes, Lowering->getStackAlign());
3694 }
3695 
3696 SDValue PPCTargetLowering::LowerFormalArguments(
3697     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3698     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3699     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3700   if (Subtarget.isAIXABI())
3701     return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3702                                     InVals);
3703   if (Subtarget.is64BitELFABI())
3704     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3705                                        InVals);
3706   if (Subtarget.is32BitELFABI())
3707     return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3708                                        InVals);
3709 
3710   return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
3711                                      InVals);
3712 }
3713 
3714 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3715     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3716     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3717     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3718 
3719   // 32-bit SVR4 ABI Stack Frame Layout:
3720   //              +-----------------------------------+
3721   //        +-->  |            Back chain             |
3722   //        |     +-----------------------------------+
3723   //        |     | Floating-point register save area |
3724   //        |     +-----------------------------------+
3725   //        |     |    General register save area     |
3726   //        |     +-----------------------------------+
3727   //        |     |          CR save word             |
3728   //        |     +-----------------------------------+
3729   //        |     |         VRSAVE save word          |
3730   //        |     +-----------------------------------+
3731   //        |     |         Alignment padding         |
3732   //        |     +-----------------------------------+
3733   //        |     |     Vector register save area     |
3734   //        |     +-----------------------------------+
3735   //        |     |       Local variable space        |
3736   //        |     +-----------------------------------+
3737   //        |     |        Parameter list area        |
3738   //        |     +-----------------------------------+
3739   //        |     |           LR save word            |
3740   //        |     +-----------------------------------+
3741   // SP-->  +---  |            Back chain             |
3742   //              +-----------------------------------+
3743   //
3744   // Specifications:
3745   //   System V Application Binary Interface PowerPC Processor Supplement
3746   //   AltiVec Technology Programming Interface Manual
3747 
3748   MachineFunction &MF = DAG.getMachineFunction();
3749   MachineFrameInfo &MFI = MF.getFrameInfo();
3750   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3751 
3752   EVT PtrVT = getPointerTy(MF.getDataLayout());
3753   // Potential tail calls could cause overwriting of argument stack slots.
3754   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3755                        (CallConv == CallingConv::Fast));
3756   const Align PtrAlign(4);
3757 
3758   // Assign locations to all of the incoming arguments.
3759   SmallVector<CCValAssign, 16> ArgLocs;
3760   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3761                  *DAG.getContext());
3762 
3763   // Reserve space for the linkage area on the stack.
3764   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3765   CCInfo.AllocateStack(LinkageSize, PtrAlign);
3766   if (useSoftFloat())
3767     CCInfo.PreAnalyzeFormalArguments(Ins);
3768 
3769   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3770   CCInfo.clearWasPPCF128();
3771 
3772   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3773     CCValAssign &VA = ArgLocs[i];
3774 
3775     // Arguments stored in registers.
3776     if (VA.isRegLoc()) {
3777       const TargetRegisterClass *RC;
3778       EVT ValVT = VA.getValVT();
3779 
3780       switch (ValVT.getSimpleVT().SimpleTy) {
3781         default:
3782           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3783         case MVT::i1:
3784         case MVT::i32:
3785           RC = &PPC::GPRCRegClass;
3786           break;
3787         case MVT::f32:
3788           if (Subtarget.hasP8Vector())
3789             RC = &PPC::VSSRCRegClass;
3790           else if (Subtarget.hasSPE())
3791             RC = &PPC::GPRCRegClass;
3792           else
3793             RC = &PPC::F4RCRegClass;
3794           break;
3795         case MVT::f64:
3796           if (Subtarget.hasVSX())
3797             RC = &PPC::VSFRCRegClass;
3798           else if (Subtarget.hasSPE())
3799             // SPE passes doubles in GPR pairs.
3800             RC = &PPC::GPRCRegClass;
3801           else
3802             RC = &PPC::F8RCRegClass;
3803           break;
3804         case MVT::v16i8:
3805         case MVT::v8i16:
3806         case MVT::v4i32:
3807           RC = &PPC::VRRCRegClass;
3808           break;
3809         case MVT::v4f32:
3810           RC = &PPC::VRRCRegClass;
3811           break;
3812         case MVT::v2f64:
3813         case MVT::v2i64:
3814           RC = &PPC::VRRCRegClass;
3815           break;
3816       }
3817 
3818       SDValue ArgValue;
3819       // Transform the arguments stored in physical registers into
3820       // virtual ones.
3821       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3822         assert(i + 1 < e && "No second half of double precision argument");
3823         unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3824         unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3825         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3826         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3827         if (!Subtarget.isLittleEndian())
3828           std::swap (ArgValueLo, ArgValueHi);
3829         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3830                                ArgValueHi);
3831       } else {
3832         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3833         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3834                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
3835         if (ValVT == MVT::i1)
3836           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3837       }
3838 
3839       InVals.push_back(ArgValue);
3840     } else {
3841       // Argument stored in memory.
3842       assert(VA.isMemLoc());
3843 
3844       // Get the extended size of the argument type in stack
3845       unsigned ArgSize = VA.getLocVT().getStoreSize();
3846       // Get the actual size of the argument type
3847       unsigned ObjSize = VA.getValVT().getStoreSize();
3848       unsigned ArgOffset = VA.getLocMemOffset();
3849       // Stack objects in PPC32 are right justified.
3850       ArgOffset += ArgSize - ObjSize;
3851       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3852 
3853       // Create load nodes to retrieve arguments from the stack.
3854       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3855       InVals.push_back(
3856           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3857     }
3858   }
3859 
3860   // Assign locations to all of the incoming aggregate by value arguments.
3861   // Aggregates passed by value are stored in the local variable space of the
3862   // caller's stack frame, right above the parameter list area.
3863   SmallVector<CCValAssign, 16> ByValArgLocs;
3864   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3865                       ByValArgLocs, *DAG.getContext());
3866 
3867   // Reserve stack space for the allocations in CCInfo.
3868   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3869 
3870   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3871 
3872   // Area that is at least reserved in the caller of this function.
3873   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3874   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3875 
3876   // Set the size that is at least reserved in caller of this function.  Tail
3877   // call optimized function's reserved stack space needs to be aligned so that
3878   // taking the difference between two stack areas will result in an aligned
3879   // stack.
3880   MinReservedArea =
3881       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3882   FuncInfo->setMinReservedArea(MinReservedArea);
3883 
3884   SmallVector<SDValue, 8> MemOps;
3885 
3886   // If the function takes variable number of arguments, make a frame index for
3887   // the start of the first vararg value... for expansion of llvm.va_start.
3888   if (isVarArg) {
3889     static const MCPhysReg GPArgRegs[] = {
3890       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3891       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3892     };
3893     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3894 
3895     static const MCPhysReg FPArgRegs[] = {
3896       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3897       PPC::F8
3898     };
3899     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3900 
3901     if (useSoftFloat() || hasSPE())
3902        NumFPArgRegs = 0;
3903 
3904     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3905     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3906 
3907     // Make room for NumGPArgRegs and NumFPArgRegs.
3908     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3909                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3910 
3911     FuncInfo->setVarArgsStackOffset(
3912       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3913                             CCInfo.getNextStackOffset(), true));
3914 
3915     FuncInfo->setVarArgsFrameIndex(
3916         MFI.CreateStackObject(Depth, Align(8), false));
3917     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3918 
3919     // The fixed integer arguments of a variadic function are stored to the
3920     // VarArgsFrameIndex on the stack so that they may be loaded by
3921     // dereferencing the result of va_next.
3922     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3923       // Get an existing live-in vreg, or add a new one.
3924       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3925       if (!VReg)
3926         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3927 
3928       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3929       SDValue Store =
3930           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3931       MemOps.push_back(Store);
3932       // Increment the address by four for the next argument to store
3933       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3934       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3935     }
3936 
3937     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3938     // is set.
3939     // The double arguments are stored to the VarArgsFrameIndex
3940     // on the stack.
3941     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
3942       // Get an existing live-in vreg, or add a new one.
3943       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
3944       if (!VReg)
3945         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
3946 
3947       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
3948       SDValue Store =
3949           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3950       MemOps.push_back(Store);
3951       // Increment the address by eight for the next argument to store
3952       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
3953                                          PtrVT);
3954       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3955     }
3956   }
3957 
3958   if (!MemOps.empty())
3959     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3960 
3961   return Chain;
3962 }
3963 
3964 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3965 // value to MVT::i64 and then truncate to the correct register size.
3966 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
3967                                              EVT ObjectVT, SelectionDAG &DAG,
3968                                              SDValue ArgVal,
3969                                              const SDLoc &dl) const {
3970   if (Flags.isSExt())
3971     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
3972                          DAG.getValueType(ObjectVT));
3973   else if (Flags.isZExt())
3974     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3975                          DAG.getValueType(ObjectVT));
3976 
3977   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3978 }
3979 
3980 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
3981     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3982     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3983     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3984   // TODO: add description of PPC stack frame format, or at least some docs.
3985   //
3986   bool isELFv2ABI = Subtarget.isELFv2ABI();
3987   bool isLittleEndian = Subtarget.isLittleEndian();
3988   MachineFunction &MF = DAG.getMachineFunction();
3989   MachineFrameInfo &MFI = MF.getFrameInfo();
3990   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3991 
3992   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3993          "fastcc not supported on varargs functions");
3994 
3995   EVT PtrVT = getPointerTy(MF.getDataLayout());
3996   // Potential tail calls could cause overwriting of argument stack slots.
3997   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3998                        (CallConv == CallingConv::Fast));
3999   unsigned PtrByteSize = 8;
4000   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4001 
4002   static const MCPhysReg GPR[] = {
4003     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4004     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4005   };
4006   static const MCPhysReg VR[] = {
4007     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4008     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4009   };
4010 
4011   const unsigned Num_GPR_Regs = array_lengthof(GPR);
4012   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4013   const unsigned Num_VR_Regs  = array_lengthof(VR);
4014 
4015   // Do a first pass over the arguments to determine whether the ABI
4016   // guarantees that our caller has allocated the parameter save area
4017   // on its stack frame.  In the ELFv1 ABI, this is always the case;
4018   // in the ELFv2 ABI, it is true if this is a vararg function or if
4019   // any parameter is located in a stack slot.
4020 
4021   bool HasParameterArea = !isELFv2ABI || isVarArg;
4022   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4023   unsigned NumBytes = LinkageSize;
4024   unsigned AvailableFPRs = Num_FPR_Regs;
4025   unsigned AvailableVRs = Num_VR_Regs;
4026   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4027     if (Ins[i].Flags.isNest())
4028       continue;
4029 
4030     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4031                                PtrByteSize, LinkageSize, ParamAreaSize,
4032                                NumBytes, AvailableFPRs, AvailableVRs))
4033       HasParameterArea = true;
4034   }
4035 
4036   // Add DAG nodes to load the arguments or copy them out of registers.  On
4037   // entry to a function on PPC, the arguments start after the linkage area,
4038   // although the first ones are often in registers.
4039 
4040   unsigned ArgOffset = LinkageSize;
4041   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4042   SmallVector<SDValue, 8> MemOps;
4043   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4044   unsigned CurArgIdx = 0;
4045   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4046     SDValue ArgVal;
4047     bool needsLoad = false;
4048     EVT ObjectVT = Ins[ArgNo].VT;
4049     EVT OrigVT = Ins[ArgNo].ArgVT;
4050     unsigned ObjSize = ObjectVT.getStoreSize();
4051     unsigned ArgSize = ObjSize;
4052     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4053     if (Ins[ArgNo].isOrigArg()) {
4054       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4055       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4056     }
4057     // We re-align the argument offset for each argument, except when using the
4058     // fast calling convention, when we need to make sure we do that only when
4059     // we'll actually use a stack slot.
4060     unsigned CurArgOffset;
4061     Align Alignment;
4062     auto ComputeArgOffset = [&]() {
4063       /* Respect alignment of argument on the stack.  */
4064       Alignment =
4065           CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4066       ArgOffset = alignTo(ArgOffset, Alignment);
4067       CurArgOffset = ArgOffset;
4068     };
4069 
4070     if (CallConv != CallingConv::Fast) {
4071       ComputeArgOffset();
4072 
4073       /* Compute GPR index associated with argument offset.  */
4074       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4075       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4076     }
4077 
4078     // FIXME the codegen can be much improved in some cases.
4079     // We do not have to keep everything in memory.
4080     if (Flags.isByVal()) {
4081       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4082 
4083       if (CallConv == CallingConv::Fast)
4084         ComputeArgOffset();
4085 
4086       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4087       ObjSize = Flags.getByValSize();
4088       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4089       // Empty aggregate parameters do not take up registers.  Examples:
4090       //   struct { } a;
4091       //   union  { } b;
4092       //   int c[0];
4093       // etc.  However, we have to provide a place-holder in InVals, so
4094       // pretend we have an 8-byte item at the current address for that
4095       // purpose.
4096       if (!ObjSize) {
4097         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4098         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4099         InVals.push_back(FIN);
4100         continue;
4101       }
4102 
4103       // Create a stack object covering all stack doublewords occupied
4104       // by the argument.  If the argument is (fully or partially) on
4105       // the stack, or if the argument is fully in registers but the
4106       // caller has allocated the parameter save anyway, we can refer
4107       // directly to the caller's stack frame.  Otherwise, create a
4108       // local copy in our own frame.
4109       int FI;
4110       if (HasParameterArea ||
4111           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4112         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4113       else
4114         FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4115       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4116 
4117       // Handle aggregates smaller than 8 bytes.
4118       if (ObjSize < PtrByteSize) {
4119         // The value of the object is its address, which differs from the
4120         // address of the enclosing doubleword on big-endian systems.
4121         SDValue Arg = FIN;
4122         if (!isLittleEndian) {
4123           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4124           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4125         }
4126         InVals.push_back(Arg);
4127 
4128         if (GPR_idx != Num_GPR_Regs) {
4129           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4130           FuncInfo->addLiveInAttr(VReg, Flags);
4131           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4132           SDValue Store;
4133 
4134           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4135             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4136                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
4137             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4138                                       MachinePointerInfo(&*FuncArg), ObjType);
4139           } else {
4140             // For sizes that don't fit a truncating store (3, 5, 6, 7),
4141             // store the whole register as-is to the parameter save area
4142             // slot.
4143             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4144                                  MachinePointerInfo(&*FuncArg));
4145           }
4146 
4147           MemOps.push_back(Store);
4148         }
4149         // Whether we copied from a register or not, advance the offset
4150         // into the parameter save area by a full doubleword.
4151         ArgOffset += PtrByteSize;
4152         continue;
4153       }
4154 
4155       // The value of the object is its address, which is the address of
4156       // its first stack doubleword.
4157       InVals.push_back(FIN);
4158 
4159       // Store whatever pieces of the object are in registers to memory.
4160       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4161         if (GPR_idx == Num_GPR_Regs)
4162           break;
4163 
4164         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4165         FuncInfo->addLiveInAttr(VReg, Flags);
4166         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4167         SDValue Addr = FIN;
4168         if (j) {
4169           SDValue Off = DAG.getConstant(j, dl, PtrVT);
4170           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4171         }
4172         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4173                                      MachinePointerInfo(&*FuncArg, j));
4174         MemOps.push_back(Store);
4175         ++GPR_idx;
4176       }
4177       ArgOffset += ArgSize;
4178       continue;
4179     }
4180 
4181     switch (ObjectVT.getSimpleVT().SimpleTy) {
4182     default: llvm_unreachable("Unhandled argument type!");
4183     case MVT::i1:
4184     case MVT::i32:
4185     case MVT::i64:
4186       if (Flags.isNest()) {
4187         // The 'nest' parameter, if any, is passed in R11.
4188         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4189         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4190 
4191         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4192           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4193 
4194         break;
4195       }
4196 
4197       // These can be scalar arguments or elements of an integer array type
4198       // passed directly.  Clang may use those instead of "byval" aggregate
4199       // types to avoid forcing arguments to memory unnecessarily.
4200       if (GPR_idx != Num_GPR_Regs) {
4201         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4202         FuncInfo->addLiveInAttr(VReg, Flags);
4203         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4204 
4205         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4206           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4207           // value to MVT::i64 and then truncate to the correct register size.
4208           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4209       } else {
4210         if (CallConv == CallingConv::Fast)
4211           ComputeArgOffset();
4212 
4213         needsLoad = true;
4214         ArgSize = PtrByteSize;
4215       }
4216       if (CallConv != CallingConv::Fast || needsLoad)
4217         ArgOffset += 8;
4218       break;
4219 
4220     case MVT::f32:
4221     case MVT::f64:
4222       // These can be scalar arguments or elements of a float array type
4223       // passed directly.  The latter are used to implement ELFv2 homogenous
4224       // float aggregates.
4225       if (FPR_idx != Num_FPR_Regs) {
4226         unsigned VReg;
4227 
4228         if (ObjectVT == MVT::f32)
4229           VReg = MF.addLiveIn(FPR[FPR_idx],
4230                               Subtarget.hasP8Vector()
4231                                   ? &PPC::VSSRCRegClass
4232                                   : &PPC::F4RCRegClass);
4233         else
4234           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4235                                                 ? &PPC::VSFRCRegClass
4236                                                 : &PPC::F8RCRegClass);
4237 
4238         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4239         ++FPR_idx;
4240       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4241         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4242         // once we support fp <-> gpr moves.
4243 
4244         // This can only ever happen in the presence of f32 array types,
4245         // since otherwise we never run out of FPRs before running out
4246         // of GPRs.
4247         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4248         FuncInfo->addLiveInAttr(VReg, Flags);
4249         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4250 
4251         if (ObjectVT == MVT::f32) {
4252           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4253             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4254                                  DAG.getConstant(32, dl, MVT::i32));
4255           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4256         }
4257 
4258         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4259       } else {
4260         if (CallConv == CallingConv::Fast)
4261           ComputeArgOffset();
4262 
4263         needsLoad = true;
4264       }
4265 
4266       // When passing an array of floats, the array occupies consecutive
4267       // space in the argument area; only round up to the next doubleword
4268       // at the end of the array.  Otherwise, each float takes 8 bytes.
4269       if (CallConv != CallingConv::Fast || needsLoad) {
4270         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4271         ArgOffset += ArgSize;
4272         if (Flags.isInConsecutiveRegsLast())
4273           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4274       }
4275       break;
4276     case MVT::v4f32:
4277     case MVT::v4i32:
4278     case MVT::v8i16:
4279     case MVT::v16i8:
4280     case MVT::v2f64:
4281     case MVT::v2i64:
4282     case MVT::v1i128:
4283     case MVT::f128:
4284       // These can be scalar arguments or elements of a vector array type
4285       // passed directly.  The latter are used to implement ELFv2 homogenous
4286       // vector aggregates.
4287       if (VR_idx != Num_VR_Regs) {
4288         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4289         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4290         ++VR_idx;
4291       } else {
4292         if (CallConv == CallingConv::Fast)
4293           ComputeArgOffset();
4294         needsLoad = true;
4295       }
4296       if (CallConv != CallingConv::Fast || needsLoad)
4297         ArgOffset += 16;
4298       break;
4299     }
4300 
4301     // We need to load the argument to a virtual register if we determined
4302     // above that we ran out of physical registers of the appropriate type.
4303     if (needsLoad) {
4304       if (ObjSize < ArgSize && !isLittleEndian)
4305         CurArgOffset += ArgSize - ObjSize;
4306       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4307       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4308       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4309     }
4310 
4311     InVals.push_back(ArgVal);
4312   }
4313 
4314   // Area that is at least reserved in the caller of this function.
4315   unsigned MinReservedArea;
4316   if (HasParameterArea)
4317     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4318   else
4319     MinReservedArea = LinkageSize;
4320 
4321   // Set the size that is at least reserved in caller of this function.  Tail
4322   // call optimized functions' reserved stack space needs to be aligned so that
4323   // taking the difference between two stack areas will result in an aligned
4324   // stack.
4325   MinReservedArea =
4326       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4327   FuncInfo->setMinReservedArea(MinReservedArea);
4328 
4329   // If the function takes variable number of arguments, make a frame index for
4330   // the start of the first vararg value... for expansion of llvm.va_start.
4331   // On ELFv2ABI spec, it writes:
4332   // C programs that are intended to be *portable* across different compilers
4333   // and architectures must use the header file <stdarg.h> to deal with variable
4334   // argument lists.
4335   if (isVarArg && MFI.hasVAStart()) {
4336     int Depth = ArgOffset;
4337 
4338     FuncInfo->setVarArgsFrameIndex(
4339       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4340     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4341 
4342     // If this function is vararg, store any remaining integer argument regs
4343     // to their spots on the stack so that they may be loaded by dereferencing
4344     // the result of va_next.
4345     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4346          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4347       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4348       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4349       SDValue Store =
4350           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4351       MemOps.push_back(Store);
4352       // Increment the address by four for the next argument to store
4353       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4354       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4355     }
4356   }
4357 
4358   if (!MemOps.empty())
4359     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4360 
4361   return Chain;
4362 }
4363 
4364 SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
4365     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4366     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4367     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4368   // TODO: add description of PPC stack frame format, or at least some docs.
4369   //
4370   MachineFunction &MF = DAG.getMachineFunction();
4371   MachineFrameInfo &MFI = MF.getFrameInfo();
4372   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4373 
4374   EVT PtrVT = getPointerTy(MF.getDataLayout());
4375   bool isPPC64 = PtrVT == MVT::i64;
4376   // Potential tail calls could cause overwriting of argument stack slots.
4377   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4378                        (CallConv == CallingConv::Fast));
4379   unsigned PtrByteSize = isPPC64 ? 8 : 4;
4380   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4381   unsigned ArgOffset = LinkageSize;
4382   // Area that is at least reserved in caller of this function.
4383   unsigned MinReservedArea = ArgOffset;
4384 
4385   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4386     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4387     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4388   };
4389   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4390     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4391     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4392   };
4393   static const MCPhysReg VR[] = {
4394     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4395     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4396   };
4397 
4398   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
4399   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4400   const unsigned Num_VR_Regs  = array_lengthof( VR);
4401 
4402   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4403 
4404   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4405 
4406   // In 32-bit non-varargs functions, the stack space for vectors is after the
4407   // stack space for non-vectors.  We do not use this space unless we have
4408   // too many vectors to fit in registers, something that only occurs in
4409   // constructed examples:), but we have to walk the arglist to figure
4410   // that out...for the pathological case, compute VecArgOffset as the
4411   // start of the vector parameter area.  Computing VecArgOffset is the
4412   // entire point of the following loop.
4413   unsigned VecArgOffset = ArgOffset;
4414   if (!isVarArg && !isPPC64) {
4415     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
4416          ++ArgNo) {
4417       EVT ObjectVT = Ins[ArgNo].VT;
4418       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4419 
4420       if (Flags.isByVal()) {
4421         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4422         unsigned ObjSize = Flags.getByValSize();
4423         unsigned ArgSize =
4424                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4425         VecArgOffset += ArgSize;
4426         continue;
4427       }
4428 
4429       switch(ObjectVT.getSimpleVT().SimpleTy) {
4430       default: llvm_unreachable("Unhandled argument type!");
4431       case MVT::i1:
4432       case MVT::i32:
4433       case MVT::f32:
4434         VecArgOffset += 4;
4435         break;
4436       case MVT::i64:  // PPC64
4437       case MVT::f64:
4438         // FIXME: We are guaranteed to be !isPPC64 at this point.
4439         // Does MVT::i64 apply?
4440         VecArgOffset += 8;
4441         break;
4442       case MVT::v4f32:
4443       case MVT::v4i32:
4444       case MVT::v8i16:
4445       case MVT::v16i8:
4446         // Nothing to do, we're only looking at Nonvector args here.
4447         break;
4448       }
4449     }
4450   }
4451   // We've found where the vector parameter area in memory is.  Skip the
4452   // first 12 parameters; these don't use that memory.
4453   VecArgOffset = ((VecArgOffset+15)/16)*16;
4454   VecArgOffset += 12*16;
4455 
4456   // Add DAG nodes to load the arguments or copy them out of registers.  On
4457   // entry to a function on PPC, the arguments start after the linkage area,
4458   // although the first ones are often in registers.
4459 
4460   SmallVector<SDValue, 8> MemOps;
4461   unsigned nAltivecParamsAtEnd = 0;
4462   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4463   unsigned CurArgIdx = 0;
4464   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4465     SDValue ArgVal;
4466     bool needsLoad = false;
4467     EVT ObjectVT = Ins[ArgNo].VT;
4468     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
4469     unsigned ArgSize = ObjSize;
4470     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4471     if (Ins[ArgNo].isOrigArg()) {
4472       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4473       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4474     }
4475     unsigned CurArgOffset = ArgOffset;
4476 
4477     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4478     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
4479         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
4480       if (isVarArg || isPPC64) {
4481         MinReservedArea = ((MinReservedArea+15)/16)*16;
4482         MinReservedArea += CalculateStackSlotSize(ObjectVT,
4483                                                   Flags,
4484                                                   PtrByteSize);
4485       } else  nAltivecParamsAtEnd++;
4486     } else
4487       // Calculate min reserved area.
4488       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
4489                                                 Flags,
4490                                                 PtrByteSize);
4491 
4492     // FIXME the codegen can be much improved in some cases.
4493     // We do not have to keep everything in memory.
4494     if (Flags.isByVal()) {
4495       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4496 
4497       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4498       ObjSize = Flags.getByValSize();
4499       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4500       // Objects of size 1 and 2 are right justified, everything else is
4501       // left justified.  This means the memory address is adjusted forwards.
4502       if (ObjSize==1 || ObjSize==2) {
4503         CurArgOffset = CurArgOffset + (4 - ObjSize);
4504       }
4505       // The value of the object is its address.
4506       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
4507       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4508       InVals.push_back(FIN);
4509       if (ObjSize==1 || ObjSize==2) {
4510         if (GPR_idx != Num_GPR_Regs) {
4511           unsigned VReg;
4512           if (isPPC64)
4513             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4514           else
4515             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4516           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4517           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
4518           SDValue Store =
4519               DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
4520                                 MachinePointerInfo(&*FuncArg), ObjType);
4521           MemOps.push_back(Store);
4522           ++GPR_idx;
4523         }
4524 
4525         ArgOffset += PtrByteSize;
4526 
4527         continue;
4528       }
4529       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4530         // Store whatever pieces of the object are in registers
4531         // to memory.  ArgOffset will be the address of the beginning
4532         // of the object.
4533         if (GPR_idx != Num_GPR_Regs) {
4534           unsigned VReg;
4535           if (isPPC64)
4536             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4537           else
4538             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4539           int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4540           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4541           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4542           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4543                                        MachinePointerInfo(&*FuncArg, j));
4544           MemOps.push_back(Store);
4545           ++GPR_idx;
4546           ArgOffset += PtrByteSize;
4547         } else {
4548           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
4549           break;
4550         }
4551       }
4552       continue;
4553     }
4554 
4555     switch (ObjectVT.getSimpleVT().SimpleTy) {
4556     default: llvm_unreachable("Unhandled argument type!");
4557     case MVT::i1:
4558     case MVT::i32:
4559       if (!isPPC64) {
4560         if (GPR_idx != Num_GPR_Regs) {
4561           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4562           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4563 
4564           if (ObjectVT == MVT::i1)
4565             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
4566 
4567           ++GPR_idx;
4568         } else {
4569           needsLoad = true;
4570           ArgSize = PtrByteSize;
4571         }
4572         // All int arguments reserve stack space in the Darwin ABI.
4573         ArgOffset += PtrByteSize;
4574         break;
4575       }
4576       LLVM_FALLTHROUGH;
4577     case MVT::i64:  // PPC64
4578       if (GPR_idx != Num_GPR_Regs) {
4579         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4580         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4581 
4582         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4583           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4584           // value to MVT::i64 and then truncate to the correct register size.
4585           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4586 
4587         ++GPR_idx;
4588       } else {
4589         needsLoad = true;
4590         ArgSize = PtrByteSize;
4591       }
4592       // All int arguments reserve stack space in the Darwin ABI.
4593       ArgOffset += 8;
4594       break;
4595 
4596     case MVT::f32:
4597     case MVT::f64:
4598       // Every 4 bytes of argument space consumes one of the GPRs available for
4599       // argument passing.
4600       if (GPR_idx != Num_GPR_Regs) {
4601         ++GPR_idx;
4602         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
4603           ++GPR_idx;
4604       }
4605       if (FPR_idx != Num_FPR_Regs) {
4606         unsigned VReg;
4607 
4608         if (ObjectVT == MVT::f32)
4609           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
4610         else
4611           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
4612 
4613         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4614         ++FPR_idx;
4615       } else {
4616         needsLoad = true;
4617       }
4618 
4619       // All FP arguments reserve stack space in the Darwin ABI.
4620       ArgOffset += isPPC64 ? 8 : ObjSize;
4621       break;
4622     case MVT::v4f32:
4623     case MVT::v4i32:
4624     case MVT::v8i16:
4625     case MVT::v16i8:
4626       // Note that vector arguments in registers don't reserve stack space,
4627       // except in varargs functions.
4628       if (VR_idx != Num_VR_Regs) {
4629         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4630         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4631         if (isVarArg) {
4632           while ((ArgOffset % 16) != 0) {
4633             ArgOffset += PtrByteSize;
4634             if (GPR_idx != Num_GPR_Regs)
4635               GPR_idx++;
4636           }
4637           ArgOffset += 16;
4638           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
4639         }
4640         ++VR_idx;
4641       } else {
4642         if (!isVarArg && !isPPC64) {
4643           // Vectors go after all the nonvectors.
4644           CurArgOffset = VecArgOffset;
4645           VecArgOffset += 16;
4646         } else {
4647           // Vectors are aligned.
4648           ArgOffset = ((ArgOffset+15)/16)*16;
4649           CurArgOffset = ArgOffset;
4650           ArgOffset += 16;
4651         }
4652         needsLoad = true;
4653       }
4654       break;
4655     }
4656 
4657     // We need to load the argument to a virtual register if we determined above
4658     // that we ran out of physical registers of the appropriate type.
4659     if (needsLoad) {
4660       int FI = MFI.CreateFixedObject(ObjSize,
4661                                      CurArgOffset + (ArgSize - ObjSize),
4662                                      isImmutable);
4663       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4664       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4665     }
4666 
4667     InVals.push_back(ArgVal);
4668   }
4669 
4670   // Allow for Altivec parameters at the end, if needed.
4671   if (nAltivecParamsAtEnd) {
4672     MinReservedArea = ((MinReservedArea+15)/16)*16;
4673     MinReservedArea += 16*nAltivecParamsAtEnd;
4674   }
4675 
4676   // Area that is at least reserved in the caller of this function.
4677   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
4678 
4679   // Set the size that is at least reserved in caller of this function.  Tail
4680   // call optimized functions' reserved stack space needs to be aligned so that
4681   // taking the difference between two stack areas will result in an aligned
4682   // stack.
4683   MinReservedArea =
4684       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4685   FuncInfo->setMinReservedArea(MinReservedArea);
4686 
4687   // If the function takes variable number of arguments, make a frame index for
4688   // the start of the first vararg value... for expansion of llvm.va_start.
4689   if (isVarArg) {
4690     int Depth = ArgOffset;
4691 
4692     FuncInfo->setVarArgsFrameIndex(
4693       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4694                             Depth, true));
4695     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4696 
4697     // If this function is vararg, store any remaining integer argument regs
4698     // to their spots on the stack so that they may be loaded by dereferencing
4699     // the result of va_next.
4700     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
4701       unsigned VReg;
4702 
4703       if (isPPC64)
4704         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4705       else
4706         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
4707 
4708       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4709       SDValue Store =
4710           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4711       MemOps.push_back(Store);
4712       // Increment the address by four for the next argument to store
4713       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4714       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4715     }
4716   }
4717 
4718   if (!MemOps.empty())
4719     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4720 
4721   return Chain;
4722 }
4723 
4724 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4725 /// adjusted to accommodate the arguments for the tailcall.
4726 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4727                                    unsigned ParamSize) {
4728 
4729   if (!isTailCall) return 0;
4730 
4731   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4732   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4733   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4734   // Remember only if the new adjustment is bigger.
4735   if (SPDiff < FI->getTailCallSPDelta())
4736     FI->setTailCallSPDelta(SPDiff);
4737 
4738   return SPDiff;
4739 }
4740 
4741 static bool isFunctionGlobalAddress(SDValue Callee);
4742 
4743 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4744                               const TargetMachine &TM) {
4745   // It does not make sense to call callsShareTOCBase() with a caller that
4746   // is PC Relative since PC Relative callers do not have a TOC.
4747 #ifndef NDEBUG
4748   const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4749   assert(!STICaller->isUsingPCRelativeCalls() &&
4750          "PC Relative callers do not have a TOC and cannot share a TOC Base");
4751 #endif
4752 
4753   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4754   // don't have enough information to determine if the caller and callee share
4755   // the same  TOC base, so we have to pessimistically assume they don't for
4756   // correctness.
4757   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4758   if (!G)
4759     return false;
4760 
4761   const GlobalValue *GV = G->getGlobal();
4762 
4763   // If the callee is preemptable, then the static linker will use a plt-stub
4764   // which saves the toc to the stack, and needs a nop after the call
4765   // instruction to convert to a toc-restore.
4766   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4767     return false;
4768 
4769   // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4770   // We may need a TOC restore in the situation where the caller requires a
4771   // valid TOC but the callee is PC Relative and does not.
4772   const Function *F = dyn_cast<Function>(GV);
4773   const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4774 
4775   // If we have an Alias we can try to get the function from there.
4776   if (Alias) {
4777     const GlobalObject *GlobalObj = Alias->getBaseObject();
4778     F = dyn_cast<Function>(GlobalObj);
4779   }
4780 
4781   // If we still have no valid function pointer we do not have enough
4782   // information to determine if the callee uses PC Relative calls so we must
4783   // assume that it does.
4784   if (!F)
4785     return false;
4786 
4787   // If the callee uses PC Relative we cannot guarantee that the callee won't
4788   // clobber the TOC of the caller and so we must assume that the two
4789   // functions do not share a TOC base.
4790   const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4791   if (STICallee->isUsingPCRelativeCalls())
4792     return false;
4793 
4794   // The medium and large code models are expected to provide a sufficiently
4795   // large TOC to provide all data addressing needs of a module with a
4796   // single TOC.
4797   if (CodeModel::Medium == TM.getCodeModel() ||
4798       CodeModel::Large == TM.getCodeModel())
4799     return true;
4800 
4801   // Otherwise we need to ensure callee and caller are in the same section,
4802   // since the linker may allocate multiple TOCs, and we don't know which
4803   // sections will belong to the same TOC base.
4804   if (!GV->isStrongDefinitionForLinker())
4805     return false;
4806 
4807   // Any explicitly-specified sections and section prefixes must also match.
4808   // Also, if we're using -ffunction-sections, then each function is always in
4809   // a different section (the same is true for COMDAT functions).
4810   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4811       GV->getSection() != Caller->getSection())
4812     return false;
4813   if (const auto *F = dyn_cast<Function>(GV)) {
4814     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4815       return false;
4816   }
4817 
4818   return true;
4819 }
4820 
4821 static bool
4822 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4823                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4824   assert(Subtarget.is64BitELFABI());
4825 
4826   const unsigned PtrByteSize = 8;
4827   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4828 
4829   static const MCPhysReg GPR[] = {
4830     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4831     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4832   };
4833   static const MCPhysReg VR[] = {
4834     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4835     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4836   };
4837 
4838   const unsigned NumGPRs = array_lengthof(GPR);
4839   const unsigned NumFPRs = 13;
4840   const unsigned NumVRs = array_lengthof(VR);
4841   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4842 
4843   unsigned NumBytes = LinkageSize;
4844   unsigned AvailableFPRs = NumFPRs;
4845   unsigned AvailableVRs = NumVRs;
4846 
4847   for (const ISD::OutputArg& Param : Outs) {
4848     if (Param.Flags.isNest()) continue;
4849 
4850     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4851                                LinkageSize, ParamAreaSize, NumBytes,
4852                                AvailableFPRs, AvailableVRs))
4853       return true;
4854   }
4855   return false;
4856 }
4857 
4858 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4859   if (CB.arg_size() != CallerFn->arg_size())
4860     return false;
4861 
4862   auto CalleeArgIter = CB.arg_begin();
4863   auto CalleeArgEnd = CB.arg_end();
4864   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4865 
4866   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4867     const Value* CalleeArg = *CalleeArgIter;
4868     const Value* CallerArg = &(*CallerArgIter);
4869     if (CalleeArg == CallerArg)
4870       continue;
4871 
4872     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4873     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4874     //      }
4875     // 1st argument of callee is undef and has the same type as caller.
4876     if (CalleeArg->getType() == CallerArg->getType() &&
4877         isa<UndefValue>(CalleeArg))
4878       continue;
4879 
4880     return false;
4881   }
4882 
4883   return true;
4884 }
4885 
4886 // Returns true if TCO is possible between the callers and callees
4887 // calling conventions.
4888 static bool
4889 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4890                                     CallingConv::ID CalleeCC) {
4891   // Tail calls are possible with fastcc and ccc.
4892   auto isTailCallableCC  = [] (CallingConv::ID CC){
4893       return  CC == CallingConv::C || CC == CallingConv::Fast;
4894   };
4895   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4896     return false;
4897 
4898   // We can safely tail call both fastcc and ccc callees from a c calling
4899   // convention caller. If the caller is fastcc, we may have less stack space
4900   // than a non-fastcc caller with the same signature so disable tail-calls in
4901   // that case.
4902   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4903 }
4904 
4905 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4906     SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4907     const SmallVectorImpl<ISD::OutputArg> &Outs,
4908     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4909   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4910 
4911   if (DisableSCO && !TailCallOpt) return false;
4912 
4913   // Variadic argument functions are not supported.
4914   if (isVarArg) return false;
4915 
4916   auto &Caller = DAG.getMachineFunction().getFunction();
4917   // Check that the calling conventions are compatible for tco.
4918   if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4919     return false;
4920 
4921   // Caller contains any byval parameter is not supported.
4922   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4923     return false;
4924 
4925   // Callee contains any byval parameter is not supported, too.
4926   // Note: This is a quick work around, because in some cases, e.g.
4927   // caller's stack size > callee's stack size, we are still able to apply
4928   // sibling call optimization. For example, gcc is able to do SCO for caller1
4929   // in the following example, but not for caller2.
4930   //   struct test {
4931   //     long int a;
4932   //     char ary[56];
4933   //   } gTest;
4934   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4935   //     b->a = v.a;
4936   //     return 0;
4937   //   }
4938   //   void caller1(struct test a, struct test c, struct test *b) {
4939   //     callee(gTest, b); }
4940   //   void caller2(struct test *b) { callee(gTest, b); }
4941   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4942     return false;
4943 
4944   // If callee and caller use different calling conventions, we cannot pass
4945   // parameters on stack since offsets for the parameter area may be different.
4946   if (Caller.getCallingConv() != CalleeCC &&
4947       needStackSlotPassParameters(Subtarget, Outs))
4948     return false;
4949 
4950   // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4951   // the caller and callee share the same TOC for TCO/SCO. If the caller and
4952   // callee potentially have different TOC bases then we cannot tail call since
4953   // we need to restore the TOC pointer after the call.
4954   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4955   // We cannot guarantee this for indirect calls or calls to external functions.
4956   // When PC-Relative addressing is used, the concept of the TOC is no longer
4957   // applicable so this check is not required.
4958   // Check first for indirect calls.
4959   if (!Subtarget.isUsingPCRelativeCalls() &&
4960       !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4961     return false;
4962 
4963   // Check if we share the TOC base.
4964   if (!Subtarget.isUsingPCRelativeCalls() &&
4965       !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4966     return false;
4967 
4968   // TCO allows altering callee ABI, so we don't have to check further.
4969   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4970     return true;
4971 
4972   if (DisableSCO) return false;
4973 
4974   // If callee use the same argument list that caller is using, then we can
4975   // apply SCO on this case. If it is not, then we need to check if callee needs
4976   // stack for passing arguments.
4977   // PC Relative tail calls may not have a CallBase.
4978   // If there is no CallBase we cannot verify if we have the same argument
4979   // list so assume that we don't have the same argument list.
4980   if (CB && !hasSameArgumentList(&Caller, *CB) &&
4981       needStackSlotPassParameters(Subtarget, Outs))
4982     return false;
4983   else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4984     return false;
4985 
4986   return true;
4987 }
4988 
4989 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4990 /// for tail call optimization. Targets which want to do tail call
4991 /// optimization should implement this function.
4992 bool
4993 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4994                                                      CallingConv::ID CalleeCC,
4995                                                      bool isVarArg,
4996                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4997                                                      SelectionDAG& DAG) const {
4998   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4999     return false;
5000 
5001   // Variable argument functions are not supported.
5002   if (isVarArg)
5003     return false;
5004 
5005   MachineFunction &MF = DAG.getMachineFunction();
5006   CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5007   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5008     // Functions containing by val parameters are not supported.
5009     for (unsigned i = 0; i != Ins.size(); i++) {
5010        ISD::ArgFlagsTy Flags = Ins[i].Flags;
5011        if (Flags.isByVal()) return false;
5012     }
5013 
5014     // Non-PIC/GOT tail calls are supported.
5015     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5016       return true;
5017 
5018     // At the moment we can only do local tail calls (in same module, hidden
5019     // or protected) if we are generating PIC.
5020     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
5021       return G->getGlobal()->hasHiddenVisibility()
5022           || G->getGlobal()->hasProtectedVisibility();
5023   }
5024 
5025   return false;
5026 }
5027 
5028 /// isCallCompatibleAddress - Return the immediate to use if the specified
5029 /// 32-bit value is representable in the immediate field of a BxA instruction.
5030 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
5031   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5032   if (!C) return nullptr;
5033 
5034   int Addr = C->getZExtValue();
5035   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
5036       SignExtend32<26>(Addr) != Addr)
5037     return nullptr;  // Top 6 bits have to be sext of immediate.
5038 
5039   return DAG
5040       .getConstant(
5041           (int)C->getZExtValue() >> 2, SDLoc(Op),
5042           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
5043       .getNode();
5044 }
5045 
5046 namespace {
5047 
5048 struct TailCallArgumentInfo {
5049   SDValue Arg;
5050   SDValue FrameIdxOp;
5051   int FrameIdx = 0;
5052 
5053   TailCallArgumentInfo() = default;
5054 };
5055 
5056 } // end anonymous namespace
5057 
5058 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5059 static void StoreTailCallArgumentsToStackSlot(
5060     SelectionDAG &DAG, SDValue Chain,
5061     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5062     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5063   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5064     SDValue Arg = TailCallArgs[i].Arg;
5065     SDValue FIN = TailCallArgs[i].FrameIdxOp;
5066     int FI = TailCallArgs[i].FrameIdx;
5067     // Store relative to framepointer.
5068     MemOpChains.push_back(DAG.getStore(
5069         Chain, dl, Arg, FIN,
5070         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
5071   }
5072 }
5073 
5074 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5075 /// the appropriate stack slot for the tail call optimized function call.
5076 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5077                                              SDValue OldRetAddr, SDValue OldFP,
5078                                              int SPDiff, const SDLoc &dl) {
5079   if (SPDiff) {
5080     // Calculate the new stack slot for the return address.
5081     MachineFunction &MF = DAG.getMachineFunction();
5082     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5083     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5084     bool isPPC64 = Subtarget.isPPC64();
5085     int SlotSize = isPPC64 ? 8 : 4;
5086     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5087     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5088                                                          NewRetAddrLoc, true);
5089     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5090     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5091     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5092                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5093   }
5094   return Chain;
5095 }
5096 
5097 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5098 /// the position of the argument.
5099 static void
5100 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5101                          SDValue Arg, int SPDiff, unsigned ArgOffset,
5102                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5103   int Offset = ArgOffset + SPDiff;
5104   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5105   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5106   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5107   SDValue FIN = DAG.getFrameIndex(FI, VT);
5108   TailCallArgumentInfo Info;
5109   Info.Arg = Arg;
5110   Info.FrameIdxOp = FIN;
5111   Info.FrameIdx = FI;
5112   TailCallArguments.push_back(Info);
5113 }
5114 
5115 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5116 /// stack slot. Returns the chain as result and the loaded frame pointers in
5117 /// LROpOut/FPOpout. Used when tail calling.
5118 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5119     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5120     SDValue &FPOpOut, const SDLoc &dl) const {
5121   if (SPDiff) {
5122     // Load the LR and FP stack slot for later adjusting.
5123     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5124     LROpOut = getReturnAddrFrameIndex(DAG);
5125     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5126     Chain = SDValue(LROpOut.getNode(), 1);
5127   }
5128   return Chain;
5129 }
5130 
5131 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5132 /// by "Src" to address "Dst" of size "Size".  Alignment information is
5133 /// specified by the specific parameter attribute. The copy will be passed as
5134 /// a byval function parameter.
5135 /// Sometimes what we are copying is the end of a larger object, the part that
5136 /// does not fit in registers.
5137 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5138                                          SDValue Chain, ISD::ArgFlagsTy Flags,
5139                                          SelectionDAG &DAG, const SDLoc &dl) {
5140   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5141   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5142                        Flags.getNonZeroByValAlign(), false, false, false,
5143                        MachinePointerInfo(), MachinePointerInfo());
5144 }
5145 
5146 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5147 /// tail calls.
5148 static void LowerMemOpCallTo(
5149     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5150     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5151     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5152     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5153   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5154   if (!isTailCall) {
5155     if (isVector) {
5156       SDValue StackPtr;
5157       if (isPPC64)
5158         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5159       else
5160         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5161       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5162                            DAG.getConstant(ArgOffset, dl, PtrVT));
5163     }
5164     MemOpChains.push_back(
5165         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5166     // Calculate and remember argument location.
5167   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5168                                   TailCallArguments);
5169 }
5170 
5171 static void
5172 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
5173                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5174                 SDValue FPOp,
5175                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5176   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5177   // might overwrite each other in case of tail call optimization.
5178   SmallVector<SDValue, 8> MemOpChains2;
5179   // Do not flag preceding copytoreg stuff together with the following stuff.
5180   InFlag = SDValue();
5181   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5182                                     MemOpChains2, dl);
5183   if (!MemOpChains2.empty())
5184     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5185 
5186   // Store the return address to the appropriate stack slot.
5187   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5188 
5189   // Emit callseq_end just before tailcall node.
5190   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5191                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5192   InFlag = Chain.getValue(1);
5193 }
5194 
5195 // Is this global address that of a function that can be called by name? (as
5196 // opposed to something that must hold a descriptor for an indirect call).
5197 static bool isFunctionGlobalAddress(SDValue Callee) {
5198   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5199     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5200         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5201       return false;
5202 
5203     return G->getGlobal()->getValueType()->isFunctionTy();
5204   }
5205 
5206   return false;
5207 }
5208 
5209 SDValue PPCTargetLowering::LowerCallResult(
5210     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5211     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5212     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5213   SmallVector<CCValAssign, 16> RVLocs;
5214   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5215                     *DAG.getContext());
5216 
5217   CCRetInfo.AnalyzeCallResult(
5218       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5219                ? RetCC_PPC_Cold
5220                : RetCC_PPC);
5221 
5222   // Copy all of the result registers out of their specified physreg.
5223   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5224     CCValAssign &VA = RVLocs[i];
5225     assert(VA.isRegLoc() && "Can only return in registers!");
5226 
5227     SDValue Val;
5228 
5229     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5230       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5231                                       InFlag);
5232       Chain = Lo.getValue(1);
5233       InFlag = Lo.getValue(2);
5234       VA = RVLocs[++i]; // skip ahead to next loc
5235       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5236                                       InFlag);
5237       Chain = Hi.getValue(1);
5238       InFlag = Hi.getValue(2);
5239       if (!Subtarget.isLittleEndian())
5240         std::swap (Lo, Hi);
5241       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5242     } else {
5243       Val = DAG.getCopyFromReg(Chain, dl,
5244                                VA.getLocReg(), VA.getLocVT(), InFlag);
5245       Chain = Val.getValue(1);
5246       InFlag = Val.getValue(2);
5247     }
5248 
5249     switch (VA.getLocInfo()) {
5250     default: llvm_unreachable("Unknown loc info!");
5251     case CCValAssign::Full: break;
5252     case CCValAssign::AExt:
5253       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5254       break;
5255     case CCValAssign::ZExt:
5256       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5257                         DAG.getValueType(VA.getValVT()));
5258       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5259       break;
5260     case CCValAssign::SExt:
5261       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5262                         DAG.getValueType(VA.getValVT()));
5263       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5264       break;
5265     }
5266 
5267     InVals.push_back(Val);
5268   }
5269 
5270   return Chain;
5271 }
5272 
5273 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5274                            const PPCSubtarget &Subtarget, bool isPatchPoint) {
5275   // PatchPoint calls are not indirect.
5276   if (isPatchPoint)
5277     return false;
5278 
5279   if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
5280     return false;
5281 
5282   // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5283   // becuase the immediate function pointer points to a descriptor instead of
5284   // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5285   // pointer immediate points to the global entry point, while the BLA would
5286   // need to jump to the local entry point (see rL211174).
5287   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5288       isBLACompatibleAddress(Callee, DAG))
5289     return false;
5290 
5291   return true;
5292 }
5293 
5294 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5295 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5296   return Subtarget.isAIXABI() ||
5297          (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5298 }
5299 
5300 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5301                               const Function &Caller,
5302                               const SDValue &Callee,
5303                               const PPCSubtarget &Subtarget,
5304                               const TargetMachine &TM) {
5305   if (CFlags.IsTailCall)
5306     return PPCISD::TC_RETURN;
5307 
5308   // This is a call through a function pointer.
5309   if (CFlags.IsIndirect) {
5310     // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5311     // indirect calls. The save of the caller's TOC pointer to the stack will be
5312     // inserted into the DAG as part of call lowering. The restore of the TOC
5313     // pointer is modeled by using a pseudo instruction for the call opcode that
5314     // represents the 2 instruction sequence of an indirect branch and link,
5315     // immediately followed by a load of the TOC pointer from the the stack save
5316     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5317     // as it is not saved or used.
5318     return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5319                                                : PPCISD::BCTRL;
5320   }
5321 
5322   if (Subtarget.isUsingPCRelativeCalls()) {
5323     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5324     return PPCISD::CALL_NOTOC;
5325   }
5326 
5327   // The ABIs that maintain a TOC pointer accross calls need to have a nop
5328   // immediately following the call instruction if the caller and callee may
5329   // have different TOC bases. At link time if the linker determines the calls
5330   // may not share a TOC base, the call is redirected to a trampoline inserted
5331   // by the linker. The trampoline will (among other things) save the callers
5332   // TOC pointer at an ABI designated offset in the linkage area and the linker
5333   // will rewrite the nop to be a load of the TOC pointer from the linkage area
5334   // into gpr2.
5335   if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5336     return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5337                                                   : PPCISD::CALL_NOP;
5338 
5339   return PPCISD::CALL;
5340 }
5341 
5342 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5343                                const SDLoc &dl, const PPCSubtarget &Subtarget) {
5344   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5345     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5346       return SDValue(Dest, 0);
5347 
5348   // Returns true if the callee is local, and false otherwise.
5349   auto isLocalCallee = [&]() {
5350     const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5351     const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5352     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5353 
5354     return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5355            !dyn_cast_or_null<GlobalIFunc>(GV);
5356   };
5357 
5358   // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5359   // a static relocation model causes some versions of GNU LD (2.17.50, at
5360   // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5361   // built with secure-PLT.
5362   bool UsePlt =
5363       Subtarget.is32BitELFABI() && !isLocalCallee() &&
5364       Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5365 
5366   const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5367     const TargetMachine &TM = Subtarget.getTargetMachine();
5368     const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5369     MCSymbolXCOFF *S =
5370         cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5371 
5372     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5373     return DAG.getMCSymbol(S, PtrVT);
5374   };
5375 
5376   if (isFunctionGlobalAddress(Callee)) {
5377     const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5378 
5379     if (Subtarget.isAIXABI()) {
5380       assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5381       return getAIXFuncEntryPointSymbolSDNode(GV);
5382     }
5383     return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5384                                       UsePlt ? PPCII::MO_PLT : 0);
5385   }
5386 
5387   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5388     const char *SymName = S->getSymbol();
5389     if (Subtarget.isAIXABI()) {
5390       // If there exists a user-declared function whose name is the same as the
5391       // ExternalSymbol's, then we pick up the user-declared version.
5392       const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5393       if (const Function *F =
5394               dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5395         return getAIXFuncEntryPointSymbolSDNode(F);
5396 
5397       // On AIX, direct function calls reference the symbol for the function's
5398       // entry point, which is named by prepending a "." before the function's
5399       // C-linkage name. A Qualname is returned here because an external
5400       // function entry point is a csect with XTY_ER property.
5401       const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5402         auto &Context = DAG.getMachineFunction().getMMI().getContext();
5403         MCSectionXCOFF *Sec = Context.getXCOFFSection(
5404             (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5405             SectionKind::getMetadata());
5406         return Sec->getQualNameSymbol();
5407       };
5408 
5409       SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5410     }
5411     return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5412                                        UsePlt ? PPCII::MO_PLT : 0);
5413   }
5414 
5415   // No transformation needed.
5416   assert(Callee.getNode() && "What no callee?");
5417   return Callee;
5418 }
5419 
5420 static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5421   assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5422          "Expected a CALLSEQ_STARTSDNode.");
5423 
5424   // The last operand is the chain, except when the node has glue. If the node
5425   // has glue, then the last operand is the glue, and the chain is the second
5426   // last operand.
5427   SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5428   if (LastValue.getValueType() != MVT::Glue)
5429     return LastValue;
5430 
5431   return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5432 }
5433 
5434 // Creates the node that moves a functions address into the count register
5435 // to prepare for an indirect call instruction.
5436 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5437                                 SDValue &Glue, SDValue &Chain,
5438                                 const SDLoc &dl) {
5439   SDValue MTCTROps[] = {Chain, Callee, Glue};
5440   EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5441   Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5442                       makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5443   // The glue is the second value produced.
5444   Glue = Chain.getValue(1);
5445 }
5446 
5447 static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5448                                           SDValue &Glue, SDValue &Chain,
5449                                           SDValue CallSeqStart,
5450                                           const CallBase *CB, const SDLoc &dl,
5451                                           bool hasNest,
5452                                           const PPCSubtarget &Subtarget) {
5453   // Function pointers in the 64-bit SVR4 ABI do not point to the function
5454   // entry point, but to the function descriptor (the function entry point
5455   // address is part of the function descriptor though).
5456   // The function descriptor is a three doubleword structure with the
5457   // following fields: function entry point, TOC base address and
5458   // environment pointer.
5459   // Thus for a call through a function pointer, the following actions need
5460   // to be performed:
5461   //   1. Save the TOC of the caller in the TOC save area of its stack
5462   //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5463   //   2. Load the address of the function entry point from the function
5464   //      descriptor.
5465   //   3. Load the TOC of the callee from the function descriptor into r2.
5466   //   4. Load the environment pointer from the function descriptor into
5467   //      r11.
5468   //   5. Branch to the function entry point address.
5469   //   6. On return of the callee, the TOC of the caller needs to be
5470   //      restored (this is done in FinishCall()).
5471   //
5472   // The loads are scheduled at the beginning of the call sequence, and the
5473   // register copies are flagged together to ensure that no other
5474   // operations can be scheduled in between. E.g. without flagging the
5475   // copies together, a TOC access in the caller could be scheduled between
5476   // the assignment of the callee TOC and the branch to the callee, which leads
5477   // to incorrect code.
5478 
5479   // Start by loading the function address from the descriptor.
5480   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5481   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5482                       ? (MachineMemOperand::MODereferenceable |
5483                          MachineMemOperand::MOInvariant)
5484                       : MachineMemOperand::MONone;
5485 
5486   MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5487 
5488   // Registers used in building the DAG.
5489   const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5490   const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5491 
5492   // Offsets of descriptor members.
5493   const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5494   const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5495 
5496   const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5497   const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5498 
5499   // One load for the functions entry point address.
5500   SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5501                                     Alignment, MMOFlags);
5502 
5503   // One for loading the TOC anchor for the module that contains the called
5504   // function.
5505   SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5506   SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5507   SDValue TOCPtr =
5508       DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5509                   MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5510 
5511   // One for loading the environment pointer.
5512   SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5513   SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5514   SDValue LoadEnvPtr =
5515       DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5516                   MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5517 
5518 
5519   // Then copy the newly loaded TOC anchor to the TOC pointer.
5520   SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5521   Chain = TOCVal.getValue(0);
5522   Glue = TOCVal.getValue(1);
5523 
5524   // If the function call has an explicit 'nest' parameter, it takes the
5525   // place of the environment pointer.
5526   assert((!hasNest || !Subtarget.isAIXABI()) &&
5527          "Nest parameter is not supported on AIX.");
5528   if (!hasNest) {
5529     SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5530     Chain = EnvVal.getValue(0);
5531     Glue = EnvVal.getValue(1);
5532   }
5533 
5534   // The rest of the indirect call sequence is the same as the non-descriptor
5535   // DAG.
5536   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5537 }
5538 
5539 static void
5540 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5541                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5542                   SelectionDAG &DAG,
5543                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5544                   SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5545                   const PPCSubtarget &Subtarget) {
5546   const bool IsPPC64 = Subtarget.isPPC64();
5547   // MVT for a general purpose register.
5548   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5549 
5550   // First operand is always the chain.
5551   Ops.push_back(Chain);
5552 
5553   // If it's a direct call pass the callee as the second operand.
5554   if (!CFlags.IsIndirect)
5555     Ops.push_back(Callee);
5556   else {
5557     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5558 
5559     // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5560     // on the stack (this would have been done in `LowerCall_64SVR4` or
5561     // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5562     // represents both the indirect branch and a load that restores the TOC
5563     // pointer from the linkage area. The operand for the TOC restore is an add
5564     // of the TOC save offset to the stack pointer. This must be the second
5565     // operand: after the chain input but before any other variadic arguments.
5566     // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5567     // saved or used.
5568     if (isTOCSaveRestoreRequired(Subtarget)) {
5569       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5570 
5571       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5572       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5573       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5574       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5575       Ops.push_back(AddTOC);
5576     }
5577 
5578     // Add the register used for the environment pointer.
5579     if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5580       Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5581                                     RegVT));
5582 
5583 
5584     // Add CTR register as callee so a bctr can be emitted later.
5585     if (CFlags.IsTailCall)
5586       Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5587   }
5588 
5589   // If this is a tail call add stack pointer delta.
5590   if (CFlags.IsTailCall)
5591     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5592 
5593   // Add argument registers to the end of the list so that they are known live
5594   // into the call.
5595   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5596     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5597                                   RegsToPass[i].second.getValueType()));
5598 
5599   // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5600   // no way to mark dependencies as implicit here.
5601   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5602   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5603        !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5604     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5605 
5606   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5607   if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5608     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5609 
5610   // Add a register mask operand representing the call-preserved registers.
5611   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5612   const uint32_t *Mask =
5613       TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5614   assert(Mask && "Missing call preserved mask for calling convention");
5615   Ops.push_back(DAG.getRegisterMask(Mask));
5616 
5617   // If the glue is valid, it is the last operand.
5618   if (Glue.getNode())
5619     Ops.push_back(Glue);
5620 }
5621 
5622 SDValue PPCTargetLowering::FinishCall(
5623     CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5624     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5625     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5626     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5627     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5628 
5629   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5630       Subtarget.isAIXABI())
5631     setUsesTOCBasePtr(DAG);
5632 
5633   unsigned CallOpc =
5634       getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5635                     Subtarget, DAG.getTarget());
5636 
5637   if (!CFlags.IsIndirect)
5638     Callee = transformCallee(Callee, DAG, dl, Subtarget);
5639   else if (Subtarget.usesFunctionDescriptors())
5640     prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5641                                   dl, CFlags.HasNest, Subtarget);
5642   else
5643     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5644 
5645   // Build the operand list for the call instruction.
5646   SmallVector<SDValue, 8> Ops;
5647   buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5648                     SPDiff, Subtarget);
5649 
5650   // Emit tail call.
5651   if (CFlags.IsTailCall) {
5652     // Indirect tail call when using PC Relative calls do not have the same
5653     // constraints.
5654     assert(((Callee.getOpcode() == ISD::Register &&
5655              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5656             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5657             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5658             isa<ConstantSDNode>(Callee) ||
5659             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5660            "Expecting a global address, external symbol, absolute value, "
5661            "register or an indirect tail call when PC Relative calls are "
5662            "used.");
5663     // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5664     assert(CallOpc == PPCISD::TC_RETURN &&
5665            "Unexpected call opcode for a tail call.");
5666     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5667     return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5668   }
5669 
5670   std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5671   Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5672   DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5673   Glue = Chain.getValue(1);
5674 
5675   // When performing tail call optimization the callee pops its arguments off
5676   // the stack. Account for this here so these bytes can be pushed back on in
5677   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5678   int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5679                          getTargetMachine().Options.GuaranteedTailCallOpt)
5680                             ? NumBytes
5681                             : 0;
5682 
5683   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5684                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5685                              Glue, dl);
5686   Glue = Chain.getValue(1);
5687 
5688   return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5689                          DAG, InVals);
5690 }
5691 
5692 SDValue
5693 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5694                              SmallVectorImpl<SDValue> &InVals) const {
5695   SelectionDAG &DAG                     = CLI.DAG;
5696   SDLoc &dl                             = CLI.DL;
5697   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5698   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5699   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5700   SDValue Chain                         = CLI.Chain;
5701   SDValue Callee                        = CLI.Callee;
5702   bool &isTailCall                      = CLI.IsTailCall;
5703   CallingConv::ID CallConv              = CLI.CallConv;
5704   bool isVarArg                         = CLI.IsVarArg;
5705   bool isPatchPoint                     = CLI.IsPatchPoint;
5706   const CallBase *CB                    = CLI.CB;
5707 
5708   if (isTailCall) {
5709     if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5710       isTailCall = false;
5711     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5712       isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5713           Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5714     else
5715       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5716                                                      Ins, DAG);
5717     if (isTailCall) {
5718       ++NumTailCalls;
5719       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5720         ++NumSiblingCalls;
5721 
5722       // PC Relative calls no longer guarantee that the callee is a Global
5723       // Address Node. The callee could be an indirect tail call in which
5724       // case the SDValue for the callee could be a load (to load the address
5725       // of a function pointer) or it may be a register copy (to move the
5726       // address of the callee from a function parameter into a virtual
5727       // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5728       assert((Subtarget.isUsingPCRelativeCalls() ||
5729               isa<GlobalAddressSDNode>(Callee)) &&
5730              "Callee should be an llvm::Function object.");
5731 
5732       LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5733                         << "\nTCO callee: ");
5734       LLVM_DEBUG(Callee.dump());
5735     }
5736   }
5737 
5738   if (!isTailCall && CB && CB->isMustTailCall())
5739     report_fatal_error("failed to perform tail call elimination on a call "
5740                        "site marked musttail");
5741 
5742   // When long calls (i.e. indirect calls) are always used, calls are always
5743   // made via function pointer. If we have a function name, first translate it
5744   // into a pointer.
5745   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5746       !isTailCall)
5747     Callee = LowerGlobalAddress(Callee, DAG);
5748 
5749   CallFlags CFlags(
5750       CallConv, isTailCall, isVarArg, isPatchPoint,
5751       isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5752       // hasNest
5753       Subtarget.is64BitELFABI() &&
5754           any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5755       CLI.NoMerge);
5756 
5757   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5758     return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5759                             InVals, CB);
5760 
5761   if (Subtarget.isSVR4ABI())
5762     return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5763                             InVals, CB);
5764 
5765   if (Subtarget.isAIXABI())
5766     return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5767                          InVals, CB);
5768 
5769   return LowerCall_Darwin(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5770                           InVals, CB);
5771 }
5772 
5773 SDValue PPCTargetLowering::LowerCall_32SVR4(
5774     SDValue Chain, SDValue Callee, CallFlags CFlags,
5775     const SmallVectorImpl<ISD::OutputArg> &Outs,
5776     const SmallVectorImpl<SDValue> &OutVals,
5777     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5778     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5779     const CallBase *CB) const {
5780   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5781   // of the 32-bit SVR4 ABI stack frame layout.
5782 
5783   const CallingConv::ID CallConv = CFlags.CallConv;
5784   const bool IsVarArg = CFlags.IsVarArg;
5785   const bool IsTailCall = CFlags.IsTailCall;
5786 
5787   assert((CallConv == CallingConv::C ||
5788           CallConv == CallingConv::Cold ||
5789           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5790 
5791   const Align PtrAlign(4);
5792 
5793   MachineFunction &MF = DAG.getMachineFunction();
5794 
5795   // Mark this function as potentially containing a function that contains a
5796   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5797   // and restoring the callers stack pointer in this functions epilog. This is
5798   // done because by tail calling the called function might overwrite the value
5799   // in this function's (MF) stack pointer stack slot 0(SP).
5800   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5801       CallConv == CallingConv::Fast)
5802     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5803 
5804   // Count how many bytes are to be pushed on the stack, including the linkage
5805   // area, parameter list area and the part of the local variable space which
5806   // contains copies of aggregates which are passed by value.
5807 
5808   // Assign locations to all of the outgoing arguments.
5809   SmallVector<CCValAssign, 16> ArgLocs;
5810   PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5811 
5812   // Reserve space for the linkage area on the stack.
5813   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5814                        PtrAlign);
5815   if (useSoftFloat())
5816     CCInfo.PreAnalyzeCallOperands(Outs);
5817 
5818   if (IsVarArg) {
5819     // Handle fixed and variable vector arguments differently.
5820     // Fixed vector arguments go into registers as long as registers are
5821     // available. Variable vector arguments always go into memory.
5822     unsigned NumArgs = Outs.size();
5823 
5824     for (unsigned i = 0; i != NumArgs; ++i) {
5825       MVT ArgVT = Outs[i].VT;
5826       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5827       bool Result;
5828 
5829       if (Outs[i].IsFixed) {
5830         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5831                                CCInfo);
5832       } else {
5833         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5834                                       ArgFlags, CCInfo);
5835       }
5836 
5837       if (Result) {
5838 #ifndef NDEBUG
5839         errs() << "Call operand #" << i << " has unhandled type "
5840              << EVT(ArgVT).getEVTString() << "\n";
5841 #endif
5842         llvm_unreachable(nullptr);
5843       }
5844     }
5845   } else {
5846     // All arguments are treated the same.
5847     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5848   }
5849   CCInfo.clearWasPPCF128();
5850 
5851   // Assign locations to all of the outgoing aggregate by value arguments.
5852   SmallVector<CCValAssign, 16> ByValArgLocs;
5853   CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5854 
5855   // Reserve stack space for the allocations in CCInfo.
5856   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5857 
5858   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5859 
5860   // Size of the linkage area, parameter list area and the part of the local
5861   // space variable where copies of aggregates which are passed by value are
5862   // stored.
5863   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5864 
5865   // Calculate by how many bytes the stack has to be adjusted in case of tail
5866   // call optimization.
5867   int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5868 
5869   // Adjust the stack pointer for the new arguments...
5870   // These operations are automatically eliminated by the prolog/epilog pass
5871   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5872   SDValue CallSeqStart = Chain;
5873 
5874   // Load the return address and frame pointer so it can be moved somewhere else
5875   // later.
5876   SDValue LROp, FPOp;
5877   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5878 
5879   // Set up a copy of the stack pointer for use loading and storing any
5880   // arguments that may not fit in the registers available for argument
5881   // passing.
5882   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5883 
5884   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5885   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5886   SmallVector<SDValue, 8> MemOpChains;
5887 
5888   bool seenFloatArg = false;
5889   // Walk the register/memloc assignments, inserting copies/loads.
5890   // i - Tracks the index into the list of registers allocated for the call
5891   // RealArgIdx - Tracks the index into the list of actual function arguments
5892   // j - Tracks the index into the list of byval arguments
5893   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5894        i != e;
5895        ++i, ++RealArgIdx) {
5896     CCValAssign &VA = ArgLocs[i];
5897     SDValue Arg = OutVals[RealArgIdx];
5898     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5899 
5900     if (Flags.isByVal()) {
5901       // Argument is an aggregate which is passed by value, thus we need to
5902       // create a copy of it in the local variable space of the current stack
5903       // frame (which is the stack frame of the caller) and pass the address of
5904       // this copy to the callee.
5905       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5906       CCValAssign &ByValVA = ByValArgLocs[j++];
5907       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5908 
5909       // Memory reserved in the local variable space of the callers stack frame.
5910       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5911 
5912       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5913       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5914                            StackPtr, PtrOff);
5915 
5916       // Create a copy of the argument in the local area of the current
5917       // stack frame.
5918       SDValue MemcpyCall =
5919         CreateCopyOfByValArgument(Arg, PtrOff,
5920                                   CallSeqStart.getNode()->getOperand(0),
5921                                   Flags, DAG, dl);
5922 
5923       // This must go outside the CALLSEQ_START..END.
5924       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5925                                                      SDLoc(MemcpyCall));
5926       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5927                              NewCallSeqStart.getNode());
5928       Chain = CallSeqStart = NewCallSeqStart;
5929 
5930       // Pass the address of the aggregate copy on the stack either in a
5931       // physical register or in the parameter list area of the current stack
5932       // frame to the callee.
5933       Arg = PtrOff;
5934     }
5935 
5936     // When useCRBits() is true, there can be i1 arguments.
5937     // It is because getRegisterType(MVT::i1) => MVT::i1,
5938     // and for other integer types getRegisterType() => MVT::i32.
5939     // Extend i1 and ensure callee will get i32.
5940     if (Arg.getValueType() == MVT::i1)
5941       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5942                         dl, MVT::i32, Arg);
5943 
5944     if (VA.isRegLoc()) {
5945       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5946       // Put argument in a physical register.
5947       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5948         bool IsLE = Subtarget.isLittleEndian();
5949         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5950                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5951         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5952         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5953                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5954         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5955                              SVal.getValue(0)));
5956       } else
5957         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5958     } else {
5959       // Put argument in the parameter list area of the current stack frame.
5960       assert(VA.isMemLoc());
5961       unsigned LocMemOffset = VA.getLocMemOffset();
5962 
5963       if (!IsTailCall) {
5964         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5965         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5966                              StackPtr, PtrOff);
5967 
5968         MemOpChains.push_back(
5969             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5970       } else {
5971         // Calculate and remember argument location.
5972         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5973                                  TailCallArguments);
5974       }
5975     }
5976   }
5977 
5978   if (!MemOpChains.empty())
5979     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5980 
5981   // Build a sequence of copy-to-reg nodes chained together with token chain
5982   // and flag operands which copy the outgoing args into the appropriate regs.
5983   SDValue InFlag;
5984   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5985     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5986                              RegsToPass[i].second, InFlag);
5987     InFlag = Chain.getValue(1);
5988   }
5989 
5990   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5991   // registers.
5992   if (IsVarArg) {
5993     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5994     SDValue Ops[] = { Chain, InFlag };
5995 
5996     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5997                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5998 
5999     InFlag = Chain.getValue(1);
6000   }
6001 
6002   if (IsTailCall)
6003     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6004                     TailCallArguments);
6005 
6006   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6007                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6008 }
6009 
6010 // Copy an argument into memory, being careful to do this outside the
6011 // call sequence for the call to which the argument belongs.
6012 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6013     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6014     SelectionDAG &DAG, const SDLoc &dl) const {
6015   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6016                         CallSeqStart.getNode()->getOperand(0),
6017                         Flags, DAG, dl);
6018   // The MEMCPY must go outside the CALLSEQ_START..END.
6019   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6020   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6021                                                  SDLoc(MemcpyCall));
6022   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6023                          NewCallSeqStart.getNode());
6024   return NewCallSeqStart;
6025 }
6026 
6027 SDValue PPCTargetLowering::LowerCall_64SVR4(
6028     SDValue Chain, SDValue Callee, CallFlags CFlags,
6029     const SmallVectorImpl<ISD::OutputArg> &Outs,
6030     const SmallVectorImpl<SDValue> &OutVals,
6031     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6032     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6033     const CallBase *CB) const {
6034   bool isELFv2ABI = Subtarget.isELFv2ABI();
6035   bool isLittleEndian = Subtarget.isLittleEndian();
6036   unsigned NumOps = Outs.size();
6037   bool IsSibCall = false;
6038   bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6039 
6040   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6041   unsigned PtrByteSize = 8;
6042 
6043   MachineFunction &MF = DAG.getMachineFunction();
6044 
6045   if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6046     IsSibCall = true;
6047 
6048   // Mark this function as potentially containing a function that contains a
6049   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6050   // and restoring the callers stack pointer in this functions epilog. This is
6051   // done because by tail calling the called function might overwrite the value
6052   // in this function's (MF) stack pointer stack slot 0(SP).
6053   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6054     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6055 
6056   assert(!(IsFastCall && CFlags.IsVarArg) &&
6057          "fastcc not supported on varargs functions");
6058 
6059   // Count how many bytes are to be pushed on the stack, including the linkage
6060   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
6061   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6062   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6063   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6064   unsigned NumBytes = LinkageSize;
6065   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6066 
6067   static const MCPhysReg GPR[] = {
6068     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6069     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6070   };
6071   static const MCPhysReg VR[] = {
6072     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6073     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6074   };
6075 
6076   const unsigned NumGPRs = array_lengthof(GPR);
6077   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6078   const unsigned NumVRs  = array_lengthof(VR);
6079 
6080   // On ELFv2, we can avoid allocating the parameter area if all the arguments
6081   // can be passed to the callee in registers.
6082   // For the fast calling convention, there is another check below.
6083   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6084   bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6085   if (!HasParameterArea) {
6086     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6087     unsigned AvailableFPRs = NumFPRs;
6088     unsigned AvailableVRs = NumVRs;
6089     unsigned NumBytesTmp = NumBytes;
6090     for (unsigned i = 0; i != NumOps; ++i) {
6091       if (Outs[i].Flags.isNest()) continue;
6092       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6093                                  PtrByteSize, LinkageSize, ParamAreaSize,
6094                                  NumBytesTmp, AvailableFPRs, AvailableVRs))
6095         HasParameterArea = true;
6096     }
6097   }
6098 
6099   // When using the fast calling convention, we don't provide backing for
6100   // arguments that will be in registers.
6101   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6102 
6103   // Avoid allocating parameter area for fastcc functions if all the arguments
6104   // can be passed in the registers.
6105   if (IsFastCall)
6106     HasParameterArea = false;
6107 
6108   // Add up all the space actually used.
6109   for (unsigned i = 0; i != NumOps; ++i) {
6110     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6111     EVT ArgVT = Outs[i].VT;
6112     EVT OrigVT = Outs[i].ArgVT;
6113 
6114     if (Flags.isNest())
6115       continue;
6116 
6117     if (IsFastCall) {
6118       if (Flags.isByVal()) {
6119         NumGPRsUsed += (Flags.getByValSize()+7)/8;
6120         if (NumGPRsUsed > NumGPRs)
6121           HasParameterArea = true;
6122       } else {
6123         switch (ArgVT.getSimpleVT().SimpleTy) {
6124         default: llvm_unreachable("Unexpected ValueType for argument!");
6125         case MVT::i1:
6126         case MVT::i32:
6127         case MVT::i64:
6128           if (++NumGPRsUsed <= NumGPRs)
6129             continue;
6130           break;
6131         case MVT::v4i32:
6132         case MVT::v8i16:
6133         case MVT::v16i8:
6134         case MVT::v2f64:
6135         case MVT::v2i64:
6136         case MVT::v1i128:
6137         case MVT::f128:
6138           if (++NumVRsUsed <= NumVRs)
6139             continue;
6140           break;
6141         case MVT::v4f32:
6142           if (++NumVRsUsed <= NumVRs)
6143             continue;
6144           break;
6145         case MVT::f32:
6146         case MVT::f64:
6147           if (++NumFPRsUsed <= NumFPRs)
6148             continue;
6149           break;
6150         }
6151         HasParameterArea = true;
6152       }
6153     }
6154 
6155     /* Respect alignment of argument on the stack.  */
6156     auto Alignement =
6157         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6158     NumBytes = alignTo(NumBytes, Alignement);
6159 
6160     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6161     if (Flags.isInConsecutiveRegsLast())
6162       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6163   }
6164 
6165   unsigned NumBytesActuallyUsed = NumBytes;
6166 
6167   // In the old ELFv1 ABI,
6168   // the prolog code of the callee may store up to 8 GPR argument registers to
6169   // the stack, allowing va_start to index over them in memory if its varargs.
6170   // Because we cannot tell if this is needed on the caller side, we have to
6171   // conservatively assume that it is needed.  As such, make sure we have at
6172   // least enough stack space for the caller to store the 8 GPRs.
6173   // In the ELFv2 ABI, we allocate the parameter area iff a callee
6174   // really requires memory operands, e.g. a vararg function.
6175   if (HasParameterArea)
6176     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6177   else
6178     NumBytes = LinkageSize;
6179 
6180   // Tail call needs the stack to be aligned.
6181   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6182     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6183 
6184   int SPDiff = 0;
6185 
6186   // Calculate by how many bytes the stack has to be adjusted in case of tail
6187   // call optimization.
6188   if (!IsSibCall)
6189     SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6190 
6191   // To protect arguments on the stack from being clobbered in a tail call,
6192   // force all the loads to happen before doing any other lowering.
6193   if (CFlags.IsTailCall)
6194     Chain = DAG.getStackArgumentTokenFactor(Chain);
6195 
6196   // Adjust the stack pointer for the new arguments...
6197   // These operations are automatically eliminated by the prolog/epilog pass
6198   if (!IsSibCall)
6199     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6200   SDValue CallSeqStart = Chain;
6201 
6202   // Load the return address and frame pointer so it can be move somewhere else
6203   // later.
6204   SDValue LROp, FPOp;
6205   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6206 
6207   // Set up a copy of the stack pointer for use loading and storing any
6208   // arguments that may not fit in the registers available for argument
6209   // passing.
6210   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6211 
6212   // Figure out which arguments are going to go in registers, and which in
6213   // memory.  Also, if this is a vararg function, floating point operations
6214   // must be stored to our stack, and loaded into integer regs as well, if
6215   // any integer regs are available for argument passing.
6216   unsigned ArgOffset = LinkageSize;
6217 
6218   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6219   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6220 
6221   SmallVector<SDValue, 8> MemOpChains;
6222   for (unsigned i = 0; i != NumOps; ++i) {
6223     SDValue Arg = OutVals[i];
6224     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6225     EVT ArgVT = Outs[i].VT;
6226     EVT OrigVT = Outs[i].ArgVT;
6227 
6228     // PtrOff will be used to store the current argument to the stack if a
6229     // register cannot be found for it.
6230     SDValue PtrOff;
6231 
6232     // We re-align the argument offset for each argument, except when using the
6233     // fast calling convention, when we need to make sure we do that only when
6234     // we'll actually use a stack slot.
6235     auto ComputePtrOff = [&]() {
6236       /* Respect alignment of argument on the stack.  */
6237       auto Alignment =
6238           CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6239       ArgOffset = alignTo(ArgOffset, Alignment);
6240 
6241       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6242 
6243       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6244     };
6245 
6246     if (!IsFastCall) {
6247       ComputePtrOff();
6248 
6249       /* Compute GPR index associated with argument offset.  */
6250       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6251       GPR_idx = std::min(GPR_idx, NumGPRs);
6252     }
6253 
6254     // Promote integers to 64-bit values.
6255     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6256       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6257       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6258       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6259     }
6260 
6261     // FIXME memcpy is used way more than necessary.  Correctness first.
6262     // Note: "by value" is code for passing a structure by value, not
6263     // basic types.
6264     if (Flags.isByVal()) {
6265       // Note: Size includes alignment padding, so
6266       //   struct x { short a; char b; }
6267       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
6268       // These are the proper values we need for right-justifying the
6269       // aggregate in a parameter register.
6270       unsigned Size = Flags.getByValSize();
6271 
6272       // An empty aggregate parameter takes up no storage and no
6273       // registers.
6274       if (Size == 0)
6275         continue;
6276 
6277       if (IsFastCall)
6278         ComputePtrOff();
6279 
6280       // All aggregates smaller than 8 bytes must be passed right-justified.
6281       if (Size==1 || Size==2 || Size==4) {
6282         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6283         if (GPR_idx != NumGPRs) {
6284           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6285                                         MachinePointerInfo(), VT);
6286           MemOpChains.push_back(Load.getValue(1));
6287           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6288 
6289           ArgOffset += PtrByteSize;
6290           continue;
6291         }
6292       }
6293 
6294       if (GPR_idx == NumGPRs && Size < 8) {
6295         SDValue AddPtr = PtrOff;
6296         if (!isLittleEndian) {
6297           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6298                                           PtrOff.getValueType());
6299           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6300         }
6301         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6302                                                           CallSeqStart,
6303                                                           Flags, DAG, dl);
6304         ArgOffset += PtrByteSize;
6305         continue;
6306       }
6307       // Copy entire object into memory.  There are cases where gcc-generated
6308       // code assumes it is there, even if it could be put entirely into
6309       // registers.  (This is not what the doc says.)
6310 
6311       // FIXME: The above statement is likely due to a misunderstanding of the
6312       // documents.  All arguments must be copied into the parameter area BY
6313       // THE CALLEE in the event that the callee takes the address of any
6314       // formal argument.  That has not yet been implemented.  However, it is
6315       // reasonable to use the stack area as a staging area for the register
6316       // load.
6317 
6318       // Skip this for small aggregates, as we will use the same slot for a
6319       // right-justified copy, below.
6320       if (Size >= 8)
6321         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6322                                                           CallSeqStart,
6323                                                           Flags, DAG, dl);
6324 
6325       // When a register is available, pass a small aggregate right-justified.
6326       if (Size < 8 && GPR_idx != NumGPRs) {
6327         // The easiest way to get this right-justified in a register
6328         // is to copy the structure into the rightmost portion of a
6329         // local variable slot, then load the whole slot into the
6330         // register.
6331         // FIXME: The memcpy seems to produce pretty awful code for
6332         // small aggregates, particularly for packed ones.
6333         // FIXME: It would be preferable to use the slot in the
6334         // parameter save area instead of a new local variable.
6335         SDValue AddPtr = PtrOff;
6336         if (!isLittleEndian) {
6337           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6338           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6339         }
6340         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6341                                                           CallSeqStart,
6342                                                           Flags, DAG, dl);
6343 
6344         // Load the slot into the register.
6345         SDValue Load =
6346             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6347         MemOpChains.push_back(Load.getValue(1));
6348         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6349 
6350         // Done with this argument.
6351         ArgOffset += PtrByteSize;
6352         continue;
6353       }
6354 
6355       // For aggregates larger than PtrByteSize, copy the pieces of the
6356       // object that fit into registers from the parameter save area.
6357       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6358         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6359         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6360         if (GPR_idx != NumGPRs) {
6361           SDValue Load =
6362               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6363           MemOpChains.push_back(Load.getValue(1));
6364           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6365           ArgOffset += PtrByteSize;
6366         } else {
6367           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6368           break;
6369         }
6370       }
6371       continue;
6372     }
6373 
6374     switch (Arg.getSimpleValueType().SimpleTy) {
6375     default: llvm_unreachable("Unexpected ValueType for argument!");
6376     case MVT::i1:
6377     case MVT::i32:
6378     case MVT::i64:
6379       if (Flags.isNest()) {
6380         // The 'nest' parameter, if any, is passed in R11.
6381         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6382         break;
6383       }
6384 
6385       // These can be scalar arguments or elements of an integer array type
6386       // passed directly.  Clang may use those instead of "byval" aggregate
6387       // types to avoid forcing arguments to memory unnecessarily.
6388       if (GPR_idx != NumGPRs) {
6389         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6390       } else {
6391         if (IsFastCall)
6392           ComputePtrOff();
6393 
6394         assert(HasParameterArea &&
6395                "Parameter area must exist to pass an argument in memory.");
6396         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6397                          true, CFlags.IsTailCall, false, MemOpChains,
6398                          TailCallArguments, dl);
6399         if (IsFastCall)
6400           ArgOffset += PtrByteSize;
6401       }
6402       if (!IsFastCall)
6403         ArgOffset += PtrByteSize;
6404       break;
6405     case MVT::f32:
6406     case MVT::f64: {
6407       // These can be scalar arguments or elements of a float array type
6408       // passed directly.  The latter are used to implement ELFv2 homogenous
6409       // float aggregates.
6410 
6411       // Named arguments go into FPRs first, and once they overflow, the
6412       // remaining arguments go into GPRs and then the parameter save area.
6413       // Unnamed arguments for vararg functions always go to GPRs and
6414       // then the parameter save area.  For now, put all arguments to vararg
6415       // routines always in both locations (FPR *and* GPR or stack slot).
6416       bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6417       bool NeededLoad = false;
6418 
6419       // First load the argument into the next available FPR.
6420       if (FPR_idx != NumFPRs)
6421         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6422 
6423       // Next, load the argument into GPR or stack slot if needed.
6424       if (!NeedGPROrStack)
6425         ;
6426       else if (GPR_idx != NumGPRs && !IsFastCall) {
6427         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6428         // once we support fp <-> gpr moves.
6429 
6430         // In the non-vararg case, this can only ever happen in the
6431         // presence of f32 array types, since otherwise we never run
6432         // out of FPRs before running out of GPRs.
6433         SDValue ArgVal;
6434 
6435         // Double values are always passed in a single GPR.
6436         if (Arg.getValueType() != MVT::f32) {
6437           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6438 
6439         // Non-array float values are extended and passed in a GPR.
6440         } else if (!Flags.isInConsecutiveRegs()) {
6441           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6442           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6443 
6444         // If we have an array of floats, we collect every odd element
6445         // together with its predecessor into one GPR.
6446         } else if (ArgOffset % PtrByteSize != 0) {
6447           SDValue Lo, Hi;
6448           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6449           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6450           if (!isLittleEndian)
6451             std::swap(Lo, Hi);
6452           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6453 
6454         // The final element, if even, goes into the first half of a GPR.
6455         } else if (Flags.isInConsecutiveRegsLast()) {
6456           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6457           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6458           if (!isLittleEndian)
6459             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6460                                  DAG.getConstant(32, dl, MVT::i32));
6461 
6462         // Non-final even elements are skipped; they will be handled
6463         // together the with subsequent argument on the next go-around.
6464         } else
6465           ArgVal = SDValue();
6466 
6467         if (ArgVal.getNode())
6468           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6469       } else {
6470         if (IsFastCall)
6471           ComputePtrOff();
6472 
6473         // Single-precision floating-point values are mapped to the
6474         // second (rightmost) word of the stack doubleword.
6475         if (Arg.getValueType() == MVT::f32 &&
6476             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6477           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6478           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6479         }
6480 
6481         assert(HasParameterArea &&
6482                "Parameter area must exist to pass an argument in memory.");
6483         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6484                          true, CFlags.IsTailCall, false, MemOpChains,
6485                          TailCallArguments, dl);
6486 
6487         NeededLoad = true;
6488       }
6489       // When passing an array of floats, the array occupies consecutive
6490       // space in the argument area; only round up to the next doubleword
6491       // at the end of the array.  Otherwise, each float takes 8 bytes.
6492       if (!IsFastCall || NeededLoad) {
6493         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6494                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6495         if (Flags.isInConsecutiveRegsLast())
6496           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6497       }
6498       break;
6499     }
6500     case MVT::v4f32:
6501     case MVT::v4i32:
6502     case MVT::v8i16:
6503     case MVT::v16i8:
6504     case MVT::v2f64:
6505     case MVT::v2i64:
6506     case MVT::v1i128:
6507     case MVT::f128:
6508       // These can be scalar arguments or elements of a vector array type
6509       // passed directly.  The latter are used to implement ELFv2 homogenous
6510       // vector aggregates.
6511 
6512       // For a varargs call, named arguments go into VRs or on the stack as
6513       // usual; unnamed arguments always go to the stack or the corresponding
6514       // GPRs when within range.  For now, we always put the value in both
6515       // locations (or even all three).
6516       if (CFlags.IsVarArg) {
6517         assert(HasParameterArea &&
6518                "Parameter area must exist if we have a varargs call.");
6519         // We could elide this store in the case where the object fits
6520         // entirely in R registers.  Maybe later.
6521         SDValue Store =
6522             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6523         MemOpChains.push_back(Store);
6524         if (VR_idx != NumVRs) {
6525           SDValue Load =
6526               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6527           MemOpChains.push_back(Load.getValue(1));
6528           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6529         }
6530         ArgOffset += 16;
6531         for (unsigned i=0; i<16; i+=PtrByteSize) {
6532           if (GPR_idx == NumGPRs)
6533             break;
6534           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6535                                    DAG.getConstant(i, dl, PtrVT));
6536           SDValue Load =
6537               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6538           MemOpChains.push_back(Load.getValue(1));
6539           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6540         }
6541         break;
6542       }
6543 
6544       // Non-varargs Altivec params go into VRs or on the stack.
6545       if (VR_idx != NumVRs) {
6546         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6547       } else {
6548         if (IsFastCall)
6549           ComputePtrOff();
6550 
6551         assert(HasParameterArea &&
6552                "Parameter area must exist to pass an argument in memory.");
6553         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6554                          true, CFlags.IsTailCall, true, MemOpChains,
6555                          TailCallArguments, dl);
6556         if (IsFastCall)
6557           ArgOffset += 16;
6558       }
6559 
6560       if (!IsFastCall)
6561         ArgOffset += 16;
6562       break;
6563     }
6564   }
6565 
6566   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6567          "mismatch in size of parameter area");
6568   (void)NumBytesActuallyUsed;
6569 
6570   if (!MemOpChains.empty())
6571     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6572 
6573   // Check if this is an indirect call (MTCTR/BCTRL).
6574   // See prepareDescriptorIndirectCall and buildCallOperands for more
6575   // information about calls through function pointers in the 64-bit SVR4 ABI.
6576   if (CFlags.IsIndirect) {
6577     // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6578     // caller in the TOC save area.
6579     if (isTOCSaveRestoreRequired(Subtarget)) {
6580       assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6581       // Load r2 into a virtual register and store it to the TOC save area.
6582       setUsesTOCBasePtr(DAG);
6583       SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6584       // TOC save area offset.
6585       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6586       SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6587       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6588       Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6589                            MachinePointerInfo::getStack(
6590                                DAG.getMachineFunction(), TOCSaveOffset));
6591     }
6592     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6593     // This does not mean the MTCTR instruction must use R12; it's easier
6594     // to model this as an extra parameter, so do that.
6595     if (isELFv2ABI && !CFlags.IsPatchPoint)
6596       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6597   }
6598 
6599   // Build a sequence of copy-to-reg nodes chained together with token chain
6600   // and flag operands which copy the outgoing args into the appropriate regs.
6601   SDValue InFlag;
6602   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6603     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6604                              RegsToPass[i].second, InFlag);
6605     InFlag = Chain.getValue(1);
6606   }
6607 
6608   if (CFlags.IsTailCall && !IsSibCall)
6609     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6610                     TailCallArguments);
6611 
6612   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6613                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6614 }
6615 
6616 SDValue PPCTargetLowering::LowerCall_Darwin(
6617     SDValue Chain, SDValue Callee, CallFlags CFlags,
6618     const SmallVectorImpl<ISD::OutputArg> &Outs,
6619     const SmallVectorImpl<SDValue> &OutVals,
6620     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6621     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6622     const CallBase *CB) const {
6623   unsigned NumOps = Outs.size();
6624 
6625   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6626   bool isPPC64 = PtrVT == MVT::i64;
6627   unsigned PtrByteSize = isPPC64 ? 8 : 4;
6628 
6629   MachineFunction &MF = DAG.getMachineFunction();
6630 
6631   // Mark this function as potentially containing a function that contains a
6632   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6633   // and restoring the callers stack pointer in this functions epilog. This is
6634   // done because by tail calling the called function might overwrite the value
6635   // in this function's (MF) stack pointer stack slot 0(SP).
6636   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6637       CFlags.CallConv == CallingConv::Fast)
6638     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6639 
6640   // Count how many bytes are to be pushed on the stack, including the linkage
6641   // area, and parameter passing area.  We start with 24/48 bytes, which is
6642   // prereserved space for [SP][CR][LR][3 x unused].
6643   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6644   unsigned NumBytes = LinkageSize;
6645 
6646   // Add up all the space actually used.
6647   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6648   // they all go in registers, but we must reserve stack space for them for
6649   // possible use by the caller.  In varargs or 64-bit calls, parameters are
6650   // assigned stack space in order, with padding so Altivec parameters are
6651   // 16-byte aligned.
6652   unsigned nAltivecParamsAtEnd = 0;
6653   for (unsigned i = 0; i != NumOps; ++i) {
6654     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6655     EVT ArgVT = Outs[i].VT;
6656     // Varargs Altivec parameters are padded to a 16 byte boundary.
6657     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
6658         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
6659         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
6660       if (!CFlags.IsVarArg && !isPPC64) {
6661         // Non-varargs Altivec parameters go after all the non-Altivec
6662         // parameters; handle those later so we know how much padding we need.
6663         nAltivecParamsAtEnd++;
6664         continue;
6665       }
6666       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6667       NumBytes = ((NumBytes+15)/16)*16;
6668     }
6669     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6670   }
6671 
6672   // Allow for Altivec parameters at the end, if needed.
6673   if (nAltivecParamsAtEnd) {
6674     NumBytes = ((NumBytes+15)/16)*16;
6675     NumBytes += 16*nAltivecParamsAtEnd;
6676   }
6677 
6678   // The prolog code of the callee may store up to 8 GPR argument registers to
6679   // the stack, allowing va_start to index over them in memory if its varargs.
6680   // Because we cannot tell if this is needed on the caller side, we have to
6681   // conservatively assume that it is needed.  As such, make sure we have at
6682   // least enough stack space for the caller to store the 8 GPRs.
6683   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6684 
6685   // Tail call needs the stack to be aligned.
6686   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6687       CFlags.CallConv == CallingConv::Fast)
6688     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6689 
6690   // Calculate by how many bytes the stack has to be adjusted in case of tail
6691   // call optimization.
6692   int SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6693 
6694   // To protect arguments on the stack from being clobbered in a tail call,
6695   // force all the loads to happen before doing any other lowering.
6696   if (CFlags.IsTailCall)
6697     Chain = DAG.getStackArgumentTokenFactor(Chain);
6698 
6699   // Adjust the stack pointer for the new arguments...
6700   // These operations are automatically eliminated by the prolog/epilog pass
6701   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6702   SDValue CallSeqStart = Chain;
6703 
6704   // Load the return address and frame pointer so it can be move somewhere else
6705   // later.
6706   SDValue LROp, FPOp;
6707   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6708 
6709   // Set up a copy of the stack pointer for use loading and storing any
6710   // arguments that may not fit in the registers available for argument
6711   // passing.
6712   SDValue StackPtr;
6713   if (isPPC64)
6714     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6715   else
6716     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6717 
6718   // Figure out which arguments are going to go in registers, and which in
6719   // memory.  Also, if this is a vararg function, floating point operations
6720   // must be stored to our stack, and loaded into integer regs as well, if
6721   // any integer regs are available for argument passing.
6722   unsigned ArgOffset = LinkageSize;
6723   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6724 
6725   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
6726     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6727     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
6728   };
6729   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
6730     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6731     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6732   };
6733   static const MCPhysReg VR[] = {
6734     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6735     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6736   };
6737   const unsigned NumGPRs = array_lengthof(GPR_32);
6738   const unsigned NumFPRs = 13;
6739   const unsigned NumVRs  = array_lengthof(VR);
6740 
6741   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
6742 
6743   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6744   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6745 
6746   SmallVector<SDValue, 8> MemOpChains;
6747   for (unsigned i = 0; i != NumOps; ++i) {
6748     SDValue Arg = OutVals[i];
6749     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6750 
6751     // PtrOff will be used to store the current argument to the stack if a
6752     // register cannot be found for it.
6753     SDValue PtrOff;
6754 
6755     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6756 
6757     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6758 
6759     // On PPC64, promote integers to 64-bit values.
6760     if (isPPC64 && Arg.getValueType() == MVT::i32) {
6761       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6762       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6763       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6764     }
6765 
6766     // FIXME memcpy is used way more than necessary.  Correctness first.
6767     // Note: "by value" is code for passing a structure by value, not
6768     // basic types.
6769     if (Flags.isByVal()) {
6770       unsigned Size = Flags.getByValSize();
6771       // Very small objects are passed right-justified.  Everything else is
6772       // passed left-justified.
6773       if (Size==1 || Size==2) {
6774         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
6775         if (GPR_idx != NumGPRs) {
6776           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6777                                         MachinePointerInfo(), VT);
6778           MemOpChains.push_back(Load.getValue(1));
6779           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6780 
6781           ArgOffset += PtrByteSize;
6782         } else {
6783           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6784                                           PtrOff.getValueType());
6785           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6786           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6787                                                             CallSeqStart,
6788                                                             Flags, DAG, dl);
6789           ArgOffset += PtrByteSize;
6790         }
6791         continue;
6792       }
6793       // Copy entire object into memory.  There are cases where gcc-generated
6794       // code assumes it is there, even if it could be put entirely into
6795       // registers.  (This is not what the doc says.)
6796       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6797                                                         CallSeqStart,
6798                                                         Flags, DAG, dl);
6799 
6800       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6801       // copy the pieces of the object that fit into registers from the
6802       // parameter save area.
6803       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6804         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6805         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6806         if (GPR_idx != NumGPRs) {
6807           SDValue Load =
6808               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6809           MemOpChains.push_back(Load.getValue(1));
6810           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6811           ArgOffset += PtrByteSize;
6812         } else {
6813           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6814           break;
6815         }
6816       }
6817       continue;
6818     }
6819 
6820     switch (Arg.getSimpleValueType().SimpleTy) {
6821     default: llvm_unreachable("Unexpected ValueType for argument!");
6822     case MVT::i1:
6823     case MVT::i32:
6824     case MVT::i64:
6825       if (GPR_idx != NumGPRs) {
6826         if (Arg.getValueType() == MVT::i1)
6827           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
6828 
6829         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6830       } else {
6831         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6832                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6833                          TailCallArguments, dl);
6834       }
6835       ArgOffset += PtrByteSize;
6836       break;
6837     case MVT::f32:
6838     case MVT::f64:
6839       if (FPR_idx != NumFPRs) {
6840         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6841 
6842         if (CFlags.IsVarArg) {
6843           SDValue Store =
6844               DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6845           MemOpChains.push_back(Store);
6846 
6847           // Float varargs are always shadowed in available integer registers
6848           if (GPR_idx != NumGPRs) {
6849             SDValue Load =
6850                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6851             MemOpChains.push_back(Load.getValue(1));
6852             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6853           }
6854           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
6855             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6856             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6857             SDValue Load =
6858                 DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo());
6859             MemOpChains.push_back(Load.getValue(1));
6860             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6861           }
6862         } else {
6863           // If we have any FPRs remaining, we may also have GPRs remaining.
6864           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6865           // GPRs.
6866           if (GPR_idx != NumGPRs)
6867             ++GPR_idx;
6868           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
6869               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
6870             ++GPR_idx;
6871         }
6872       } else
6873         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6874                          isPPC64, CFlags.IsTailCall, false, MemOpChains,
6875                          TailCallArguments, dl);
6876       if (isPPC64)
6877         ArgOffset += 8;
6878       else
6879         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
6880       break;
6881     case MVT::v4f32:
6882     case MVT::v4i32:
6883     case MVT::v8i16:
6884     case MVT::v16i8:
6885       if (CFlags.IsVarArg) {
6886         // These go aligned on the stack, or in the corresponding R registers
6887         // when within range.  The Darwin PPC ABI doc claims they also go in
6888         // V registers; in fact gcc does this only for arguments that are
6889         // prototyped, not for those that match the ...  We do it for all
6890         // arguments, seems to work.
6891         while (ArgOffset % 16 !=0) {
6892           ArgOffset += PtrByteSize;
6893           if (GPR_idx != NumGPRs)
6894             GPR_idx++;
6895         }
6896         // We could elide this store in the case where the object fits
6897         // entirely in R registers.  Maybe later.
6898         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
6899                              DAG.getConstant(ArgOffset, dl, PtrVT));
6900         SDValue Store =
6901             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6902         MemOpChains.push_back(Store);
6903         if (VR_idx != NumVRs) {
6904           SDValue Load =
6905               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6906           MemOpChains.push_back(Load.getValue(1));
6907           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6908         }
6909         ArgOffset += 16;
6910         for (unsigned i=0; i<16; i+=PtrByteSize) {
6911           if (GPR_idx == NumGPRs)
6912             break;
6913           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6914                                    DAG.getConstant(i, dl, PtrVT));
6915           SDValue Load =
6916               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6917           MemOpChains.push_back(Load.getValue(1));
6918           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6919         }
6920         break;
6921       }
6922 
6923       // Non-varargs Altivec params generally go in registers, but have
6924       // stack space allocated at the end.
6925       if (VR_idx != NumVRs) {
6926         // Doesn't have GPR space allocated.
6927         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6928       } else if (nAltivecParamsAtEnd==0) {
6929         // We are emitting Altivec params in order.
6930         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6931                          isPPC64, CFlags.IsTailCall, true, MemOpChains,
6932                          TailCallArguments, dl);
6933         ArgOffset += 16;
6934       }
6935       break;
6936     }
6937   }
6938   // If all Altivec parameters fit in registers, as they usually do,
6939   // they get stack space following the non-Altivec parameters.  We
6940   // don't track this here because nobody below needs it.
6941   // If there are more Altivec parameters than fit in registers emit
6942   // the stores here.
6943   if (!CFlags.IsVarArg && nAltivecParamsAtEnd > NumVRs) {
6944     unsigned j = 0;
6945     // Offset is aligned; skip 1st 12 params which go in V registers.
6946     ArgOffset = ((ArgOffset+15)/16)*16;
6947     ArgOffset += 12*16;
6948     for (unsigned i = 0; i != NumOps; ++i) {
6949       SDValue Arg = OutVals[i];
6950       EVT ArgType = Outs[i].VT;
6951       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
6952           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
6953         if (++j > NumVRs) {
6954           SDValue PtrOff;
6955           // We are emitting Altivec params in order.
6956           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6957                            isPPC64, CFlags.IsTailCall, true, MemOpChains,
6958                            TailCallArguments, dl);
6959           ArgOffset += 16;
6960         }
6961       }
6962     }
6963   }
6964 
6965   if (!MemOpChains.empty())
6966     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6967 
6968   // On Darwin, R12 must contain the address of an indirect callee.  This does
6969   // not mean the MTCTR instruction must use R12; it's easier to model this as
6970   // an extra parameter, so do that.
6971   if (CFlags.IsIndirect) {
6972     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
6973     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
6974                                                    PPC::R12), Callee));
6975   }
6976 
6977   // Build a sequence of copy-to-reg nodes chained together with token chain
6978   // and flag operands which copy the outgoing args into the appropriate regs.
6979   SDValue InFlag;
6980   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6981     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6982                              RegsToPass[i].second, InFlag);
6983     InFlag = Chain.getValue(1);
6984   }
6985 
6986   if (CFlags.IsTailCall)
6987     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6988                     TailCallArguments);
6989 
6990   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6991                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6992 }
6993 
6994 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6995                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6996                    CCState &State) {
6997 
6998   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6999       State.getMachineFunction().getSubtarget());
7000   const bool IsPPC64 = Subtarget.isPPC64();
7001   const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
7002   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
7003 
7004   if (ValVT.isVector() && !State.getMachineFunction()
7005                                .getTarget()
7006                                .Options.EnableAIXExtendedAltivecABI)
7007     report_fatal_error("the default Altivec AIX ABI is not yet supported");
7008 
7009   if (ValVT.isVector() && State.getMachineFunction()
7010                               .getTarget()
7011                               .Options.EnableAIXExtendedAltivecABI)
7012     report_fatal_error("the extended Altivec AIX ABI is not yet supported");
7013 
7014   assert((!ValVT.isInteger() ||
7015           (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) &&
7016          "Integer argument exceeds register size: should have been legalized");
7017 
7018   if (ValVT == MVT::f128)
7019     report_fatal_error("f128 is unimplemented on AIX.");
7020 
7021   if (ArgFlags.isNest())
7022     report_fatal_error("Nest arguments are unimplemented.");
7023 
7024   if (ValVT.isVector() || LocVT.isVector())
7025     report_fatal_error("Vector arguments are unimplemented on AIX.");
7026 
7027   static const MCPhysReg GPR_32[] = {// 32-bit registers.
7028                                      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7029                                      PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7030   static const MCPhysReg GPR_64[] = {// 64-bit registers.
7031                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7032                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7033 
7034   if (ArgFlags.isByVal()) {
7035     if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
7036       report_fatal_error("Pass-by-value arguments with alignment greater than "
7037                          "register width are not supported.");
7038 
7039     const unsigned ByValSize = ArgFlags.getByValSize();
7040 
7041     // An empty aggregate parameter takes up no storage and no registers,
7042     // but needs a MemLoc for a stack slot for the formal arguments side.
7043     if (ByValSize == 0) {
7044       State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
7045                                        State.getNextStackOffset(), RegVT,
7046                                        LocInfo));
7047       return false;
7048     }
7049 
7050     const unsigned StackSize = alignTo(ByValSize, PtrAlign);
7051     unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
7052     for (const unsigned E = Offset + StackSize; Offset < E;
7053          Offset += PtrAlign.value()) {
7054       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
7055         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7056       else {
7057         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
7058                                          Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
7059                                          LocInfo));
7060         break;
7061       }
7062     }
7063     return false;
7064   }
7065 
7066   // Arguments always reserve parameter save area.
7067   switch (ValVT.SimpleTy) {
7068   default:
7069     report_fatal_error("Unhandled value type for argument.");
7070   case MVT::i64:
7071     // i64 arguments should have been split to i32 for PPC32.
7072     assert(IsPPC64 && "PPC32 should have split i64 values.");
7073     LLVM_FALLTHROUGH;
7074   case MVT::i1:
7075   case MVT::i32: {
7076     const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
7077     // AIX integer arguments are always passed in register width.
7078     if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
7079       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
7080                                   : CCValAssign::LocInfo::ZExt;
7081     if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
7082       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7083     else
7084       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
7085 
7086     return false;
7087   }
7088   case MVT::f32:
7089   case MVT::f64: {
7090     // Parameter save area (PSA) is reserved even if the float passes in fpr.
7091     const unsigned StoreSize = LocVT.getStoreSize();
7092     // Floats are always 4-byte aligned in the PSA on AIX.
7093     // This includes f64 in 64-bit mode for ABI compatibility.
7094     const unsigned Offset =
7095         State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
7096     unsigned FReg = State.AllocateReg(FPR);
7097     if (FReg)
7098       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7099 
7100     // Reserve and initialize GPRs or initialize the PSA as required.
7101     for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
7102       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
7103         assert(FReg && "An FPR should be available when a GPR is reserved.");
7104         if (State.isVarArg()) {
7105           // Successfully reserved GPRs are only initialized for vararg calls.
7106           // Custom handling is required for:
7107           //   f64 in PPC32 needs to be split into 2 GPRs.
7108           //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7109           State.addLoc(
7110               CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7111         }
7112       } else {
7113         // If there are insufficient GPRs, the PSA needs to be initialized.
7114         // Initialization occurs even if an FPR was initialized for
7115         // compatibility with the AIX XL compiler. The full memory for the
7116         // argument will be initialized even if a prior word is saved in GPR.
7117         // A custom memLoc is used when the argument also passes in FPR so
7118         // that the callee handling can skip over it easily.
7119         State.addLoc(
7120             FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7121                                              LocInfo)
7122                  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7123         break;
7124       }
7125     }
7126 
7127     return false;
7128   }
7129   }
7130   return true;
7131 }
7132 
7133 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7134                                                     bool IsPPC64) {
7135   assert((IsPPC64 || SVT != MVT::i64) &&
7136          "i64 should have been split for 32-bit codegen.");
7137 
7138   switch (SVT) {
7139   default:
7140     report_fatal_error("Unexpected value type for formal argument");
7141   case MVT::i1:
7142   case MVT::i32:
7143   case MVT::i64:
7144     return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7145   case MVT::f32:
7146     return &PPC::F4RCRegClass;
7147   case MVT::f64:
7148     return &PPC::F8RCRegClass;
7149   }
7150 }
7151 
7152 static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7153                                         SelectionDAG &DAG, SDValue ArgValue,
7154                                         MVT LocVT, const SDLoc &dl) {
7155   assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7156   assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7157 
7158   if (Flags.isSExt())
7159     ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7160                            DAG.getValueType(ValVT));
7161   else if (Flags.isZExt())
7162     ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7163                            DAG.getValueType(ValVT));
7164 
7165   return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7166 }
7167 
7168 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7169   const unsigned LASize = FL->getLinkageSize();
7170 
7171   if (PPC::GPRCRegClass.contains(Reg)) {
7172     assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7173            "Reg must be a valid argument register!");
7174     return LASize + 4 * (Reg - PPC::R3);
7175   }
7176 
7177   if (PPC::G8RCRegClass.contains(Reg)) {
7178     assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7179            "Reg must be a valid argument register!");
7180     return LASize + 8 * (Reg - PPC::X3);
7181   }
7182 
7183   llvm_unreachable("Only general purpose registers expected.");
7184 }
7185 
7186 //   AIX ABI Stack Frame Layout:
7187 //
7188 //   Low Memory +--------------------------------------------+
7189 //   SP   +---> | Back chain                                 | ---+
7190 //        |     +--------------------------------------------+    |
7191 //        |     | Saved Condition Register                   |    |
7192 //        |     +--------------------------------------------+    |
7193 //        |     | Saved Linkage Register                     |    |
7194 //        |     +--------------------------------------------+    | Linkage Area
7195 //        |     | Reserved for compilers                     |    |
7196 //        |     +--------------------------------------------+    |
7197 //        |     | Reserved for binders                       |    |
7198 //        |     +--------------------------------------------+    |
7199 //        |     | Saved TOC pointer                          | ---+
7200 //        |     +--------------------------------------------+
7201 //        |     | Parameter save area                        |
7202 //        |     +--------------------------------------------+
7203 //        |     | Alloca space                               |
7204 //        |     +--------------------------------------------+
7205 //        |     | Local variable space                       |
7206 //        |     +--------------------------------------------+
7207 //        |     | Float/int conversion temporary             |
7208 //        |     +--------------------------------------------+
7209 //        |     | Save area for AltiVec registers            |
7210 //        |     +--------------------------------------------+
7211 //        |     | AltiVec alignment padding                  |
7212 //        |     +--------------------------------------------+
7213 //        |     | Save area for VRSAVE register              |
7214 //        |     +--------------------------------------------+
7215 //        |     | Save area for General Purpose registers    |
7216 //        |     +--------------------------------------------+
7217 //        |     | Save area for Floating Point registers     |
7218 //        |     +--------------------------------------------+
7219 //        +---- | Back chain                                 |
7220 // High Memory  +--------------------------------------------+
7221 //
7222 //  Specifications:
7223 //  AIX 7.2 Assembler Language Reference
7224 //  Subroutine linkage convention
7225 
7226 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7227     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7228     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7229     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7230 
7231   assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7232           CallConv == CallingConv::Fast) &&
7233          "Unexpected calling convention!");
7234 
7235   if (getTargetMachine().Options.GuaranteedTailCallOpt)
7236     report_fatal_error("Tail call support is unimplemented on AIX.");
7237 
7238   if (useSoftFloat())
7239     report_fatal_error("Soft float support is unimplemented on AIX.");
7240 
7241   const PPCSubtarget &Subtarget =
7242       static_cast<const PPCSubtarget &>(DAG.getSubtarget());
7243 
7244   const bool IsPPC64 = Subtarget.isPPC64();
7245   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7246 
7247   // Assign locations to all of the incoming arguments.
7248   SmallVector<CCValAssign, 16> ArgLocs;
7249   MachineFunction &MF = DAG.getMachineFunction();
7250   MachineFrameInfo &MFI = MF.getFrameInfo();
7251   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7252 
7253   const EVT PtrVT = getPointerTy(MF.getDataLayout());
7254   // Reserve space for the linkage area on the stack.
7255   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7256   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7257   CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7258 
7259   SmallVector<SDValue, 8> MemOps;
7260 
7261   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7262     CCValAssign &VA = ArgLocs[I++];
7263     MVT LocVT = VA.getLocVT();
7264     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7265 
7266     // For compatibility with the AIX XL compiler, the float args in the
7267     // parameter save area are initialized even if the argument is available
7268     // in register.  The caller is required to initialize both the register
7269     // and memory, however, the callee can choose to expect it in either.
7270     // The memloc is dismissed here because the argument is retrieved from
7271     // the register.
7272     if (VA.isMemLoc() && VA.needsCustom())
7273       continue;
7274 
7275     if (Flags.isByVal() && VA.isMemLoc()) {
7276       const unsigned Size =
7277           alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7278                   PtrByteSize);
7279       const int FI = MF.getFrameInfo().CreateFixedObject(
7280           Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7281           /* IsAliased */ true);
7282       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7283       InVals.push_back(FIN);
7284 
7285       continue;
7286     }
7287 
7288     if (Flags.isByVal()) {
7289       assert(VA.isRegLoc() && "MemLocs should already be handled.");
7290 
7291       const MCPhysReg ArgReg = VA.getLocReg();
7292       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7293 
7294       if (Flags.getNonZeroByValAlign() > PtrByteSize)
7295         report_fatal_error("Over aligned byvals not supported yet.");
7296 
7297       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7298       const int FI = MF.getFrameInfo().CreateFixedObject(
7299           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7300           /* IsAliased */ true);
7301       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7302       InVals.push_back(FIN);
7303 
7304       // Add live ins for all the RegLocs for the same ByVal.
7305       const TargetRegisterClass *RegClass =
7306           IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7307 
7308       auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7309                                                unsigned Offset) {
7310         const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
7311         // Since the callers side has left justified the aggregate in the
7312         // register, we can simply store the entire register into the stack
7313         // slot.
7314         SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7315         // The store to the fixedstack object is needed becuase accessing a
7316         // field of the ByVal will use a gep and load. Ideally we will optimize
7317         // to extracting the value from the register directly, and elide the
7318         // stores when the arguments address is not taken, but that will need to
7319         // be future work.
7320         SDValue Store = DAG.getStore(
7321             CopyFrom.getValue(1), dl, CopyFrom,
7322             DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7323             MachinePointerInfo::getFixedStack(MF, FI, Offset));
7324 
7325         MemOps.push_back(Store);
7326       };
7327 
7328       unsigned Offset = 0;
7329       HandleRegLoc(VA.getLocReg(), Offset);
7330       Offset += PtrByteSize;
7331       for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7332            Offset += PtrByteSize) {
7333         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7334                "RegLocs should be for ByVal argument.");
7335 
7336         const CCValAssign RL = ArgLocs[I++];
7337         HandleRegLoc(RL.getLocReg(), Offset);
7338       }
7339 
7340       if (Offset != StackSize) {
7341         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7342                "Expected MemLoc for remaining bytes.");
7343         assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7344         // Consume the MemLoc.The InVal has already been emitted, so nothing
7345         // more needs to be done.
7346         ++I;
7347       }
7348 
7349       continue;
7350     }
7351 
7352     EVT ValVT = VA.getValVT();
7353     if (VA.isRegLoc() && !VA.needsCustom()) {
7354       MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
7355       unsigned VReg =
7356           MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
7357       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7358       if (ValVT.isScalarInteger() &&
7359           (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7360         ArgValue =
7361             truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7362       }
7363       InVals.push_back(ArgValue);
7364       continue;
7365     }
7366     if (VA.isMemLoc()) {
7367       const unsigned LocSize = LocVT.getStoreSize();
7368       const unsigned ValSize = ValVT.getStoreSize();
7369       assert((ValSize <= LocSize) &&
7370              "Object size is larger than size of MemLoc");
7371       int CurArgOffset = VA.getLocMemOffset();
7372       // Objects are right-justified because AIX is big-endian.
7373       if (LocSize > ValSize)
7374         CurArgOffset += LocSize - ValSize;
7375       // Potential tail calls could cause overwriting of argument stack slots.
7376       const bool IsImmutable =
7377           !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7378             (CallConv == CallingConv::Fast));
7379       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7380       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7381       SDValue ArgValue =
7382           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7383       InVals.push_back(ArgValue);
7384       continue;
7385     }
7386   }
7387 
7388   // On AIX a minimum of 8 words is saved to the parameter save area.
7389   const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7390   // Area that is at least reserved in the caller of this function.
7391   unsigned CallerReservedArea =
7392       std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7393 
7394   // Set the size that is at least reserved in caller of this function. Tail
7395   // call optimized function's reserved stack space needs to be aligned so
7396   // that taking the difference between two stack areas will result in an
7397   // aligned stack.
7398   CallerReservedArea =
7399       EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7400   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7401   FuncInfo->setMinReservedArea(CallerReservedArea);
7402 
7403   if (isVarArg) {
7404     FuncInfo->setVarArgsFrameIndex(
7405         MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7406     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7407 
7408     static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7409                                        PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7410 
7411     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7412                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7413     const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7414 
7415     // The fixed integer arguments of a variadic function are stored to the
7416     // VarArgsFrameIndex on the stack so that they may be loaded by
7417     // dereferencing the result of va_next.
7418     for (unsigned GPRIndex =
7419              (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7420          GPRIndex < NumGPArgRegs; ++GPRIndex) {
7421 
7422       const unsigned VReg =
7423           IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7424                   : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7425 
7426       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7427       SDValue Store =
7428           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7429       MemOps.push_back(Store);
7430       // Increment the address for the next argument to store.
7431       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7432       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7433     }
7434   }
7435 
7436   if (!MemOps.empty())
7437     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7438 
7439   return Chain;
7440 }
7441 
7442 SDValue PPCTargetLowering::LowerCall_AIX(
7443     SDValue Chain, SDValue Callee, CallFlags CFlags,
7444     const SmallVectorImpl<ISD::OutputArg> &Outs,
7445     const SmallVectorImpl<SDValue> &OutVals,
7446     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7447     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7448     const CallBase *CB) const {
7449   // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7450   // AIX ABI stack frame layout.
7451 
7452   assert((CFlags.CallConv == CallingConv::C ||
7453           CFlags.CallConv == CallingConv::Cold ||
7454           CFlags.CallConv == CallingConv::Fast) &&
7455          "Unexpected calling convention!");
7456 
7457   if (CFlags.IsPatchPoint)
7458     report_fatal_error("This call type is unimplemented on AIX.");
7459 
7460   const PPCSubtarget& Subtarget =
7461       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7462   if (Subtarget.hasAltivec())
7463     report_fatal_error("Altivec support is unimplemented on AIX.");
7464 
7465   MachineFunction &MF = DAG.getMachineFunction();
7466   SmallVector<CCValAssign, 16> ArgLocs;
7467   CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7468                  *DAG.getContext());
7469 
7470   // Reserve space for the linkage save area (LSA) on the stack.
7471   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7472   //   [SP][CR][LR][2 x reserved][TOC].
7473   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7474   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7475   const bool IsPPC64 = Subtarget.isPPC64();
7476   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7477   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7478   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7479   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7480 
7481   // The prolog code of the callee may store up to 8 GPR argument registers to
7482   // the stack, allowing va_start to index over them in memory if the callee
7483   // is variadic.
7484   // Because we cannot tell if this is needed on the caller side, we have to
7485   // conservatively assume that it is needed.  As such, make sure we have at
7486   // least enough stack space for the caller to store the 8 GPRs.
7487   const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7488   const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7489                                      CCInfo.getNextStackOffset());
7490 
7491   // Adjust the stack pointer for the new arguments...
7492   // These operations are automatically eliminated by the prolog/epilog pass.
7493   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7494   SDValue CallSeqStart = Chain;
7495 
7496   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7497   SmallVector<SDValue, 8> MemOpChains;
7498 
7499   // Set up a copy of the stack pointer for loading and storing any
7500   // arguments that may not fit in the registers available for argument
7501   // passing.
7502   const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7503                                    : DAG.getRegister(PPC::R1, MVT::i32);
7504 
7505   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7506     const unsigned ValNo = ArgLocs[I].getValNo();
7507     SDValue Arg = OutVals[ValNo];
7508     ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7509 
7510     if (Flags.isByVal()) {
7511       const unsigned ByValSize = Flags.getByValSize();
7512 
7513       // Nothing to do for zero-sized ByVals on the caller side.
7514       if (!ByValSize) {
7515         ++I;
7516         continue;
7517       }
7518 
7519       auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7520         return DAG.getExtLoad(
7521             ISD::ZEXTLOAD, dl, PtrVT, Chain,
7522             (LoadOffset != 0)
7523                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7524                 : Arg,
7525             MachinePointerInfo(), VT);
7526       };
7527 
7528       unsigned LoadOffset = 0;
7529 
7530       // Initialize registers, which are fully occupied by the by-val argument.
7531       while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7532         SDValue Load = GetLoad(PtrVT, LoadOffset);
7533         MemOpChains.push_back(Load.getValue(1));
7534         LoadOffset += PtrByteSize;
7535         const CCValAssign &ByValVA = ArgLocs[I++];
7536         assert(ByValVA.getValNo() == ValNo &&
7537                "Unexpected location for pass-by-value argument.");
7538         RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7539       }
7540 
7541       if (LoadOffset == ByValSize)
7542         continue;
7543 
7544       // There must be one more loc to handle the remainder.
7545       assert(ArgLocs[I].getValNo() == ValNo &&
7546              "Expected additional location for by-value argument.");
7547 
7548       if (ArgLocs[I].isMemLoc()) {
7549         assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7550         const CCValAssign &ByValVA = ArgLocs[I++];
7551         ISD::ArgFlagsTy MemcpyFlags = Flags;
7552         // Only memcpy the bytes that don't pass in register.
7553         MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7554         Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7555             (LoadOffset != 0)
7556                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7557                 : Arg,
7558             DAG.getObjectPtrOffset(dl, StackPtr,
7559                                    TypeSize::Fixed(ByValVA.getLocMemOffset())),
7560             CallSeqStart, MemcpyFlags, DAG, dl);
7561         continue;
7562       }
7563 
7564       // Initialize the final register residue.
7565       // Any residue that occupies the final by-val arg register must be
7566       // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7567       // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7568       // 2 and 1 byte loads.
7569       const unsigned ResidueBytes = ByValSize % PtrByteSize;
7570       assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7571              "Unexpected register residue for by-value argument.");
7572       SDValue ResidueVal;
7573       for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7574         const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7575         const MVT VT =
7576             N == 1 ? MVT::i8
7577                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7578         SDValue Load = GetLoad(VT, LoadOffset);
7579         MemOpChains.push_back(Load.getValue(1));
7580         LoadOffset += N;
7581         Bytes += N;
7582 
7583         // By-val arguments are passed left-justfied in register.
7584         // Every load here needs to be shifted, otherwise a full register load
7585         // should have been used.
7586         assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7587                "Unexpected load emitted during handling of pass-by-value "
7588                "argument.");
7589         unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7590         EVT ShiftAmountTy =
7591             getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7592         SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7593         SDValue ShiftedLoad =
7594             DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7595         ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7596                                               ShiftedLoad)
7597                                 : ShiftedLoad;
7598       }
7599 
7600       const CCValAssign &ByValVA = ArgLocs[I++];
7601       RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7602       continue;
7603     }
7604 
7605     CCValAssign &VA = ArgLocs[I++];
7606     const MVT LocVT = VA.getLocVT();
7607     const MVT ValVT = VA.getValVT();
7608 
7609     switch (VA.getLocInfo()) {
7610     default:
7611       report_fatal_error("Unexpected argument extension type.");
7612     case CCValAssign::Full:
7613       break;
7614     case CCValAssign::ZExt:
7615       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7616       break;
7617     case CCValAssign::SExt:
7618       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7619       break;
7620     }
7621 
7622     if (VA.isRegLoc() && !VA.needsCustom()) {
7623       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7624       continue;
7625     }
7626 
7627     if (VA.isMemLoc()) {
7628       SDValue PtrOff =
7629           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7630       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7631       MemOpChains.push_back(
7632           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7633 
7634       continue;
7635     }
7636 
7637     // Custom handling is used for GPR initializations for vararg float
7638     // arguments.
7639     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7640            ValVT.isFloatingPoint() && LocVT.isInteger() &&
7641            "Unexpected register handling for calling convention.");
7642 
7643     SDValue ArgAsInt =
7644         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7645 
7646     if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7647       // f32 in 32-bit GPR
7648       // f64 in 64-bit GPR
7649       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7650     else if (Arg.getValueType().getFixedSizeInBits() <
7651              LocVT.getFixedSizeInBits())
7652       // f32 in 64-bit GPR.
7653       RegsToPass.push_back(std::make_pair(
7654           VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7655     else {
7656       // f64 in two 32-bit GPRs
7657       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7658       assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7659              "Unexpected custom register for argument!");
7660       CCValAssign &GPR1 = VA;
7661       SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7662                                      DAG.getConstant(32, dl, MVT::i8));
7663       RegsToPass.push_back(std::make_pair(
7664           GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7665 
7666       if (I != E) {
7667         // If only 1 GPR was available, there will only be one custom GPR and
7668         // the argument will also pass in memory.
7669         CCValAssign &PeekArg = ArgLocs[I];
7670         if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7671           assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7672           CCValAssign &GPR2 = ArgLocs[I++];
7673           RegsToPass.push_back(std::make_pair(
7674               GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7675         }
7676       }
7677     }
7678   }
7679 
7680   if (!MemOpChains.empty())
7681     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7682 
7683   // For indirect calls, we need to save the TOC base to the stack for
7684   // restoration after the call.
7685   if (CFlags.IsIndirect) {
7686     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7687     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7688     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7689     const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7690     const unsigned TOCSaveOffset =
7691         Subtarget.getFrameLowering()->getTOCSaveOffset();
7692 
7693     setUsesTOCBasePtr(DAG);
7694     SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7695     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7696     SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7697     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7698     Chain = DAG.getStore(
7699         Val.getValue(1), dl, Val, AddPtr,
7700         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7701   }
7702 
7703   // Build a sequence of copy-to-reg nodes chained together with token chain
7704   // and flag operands which copy the outgoing args into the appropriate regs.
7705   SDValue InFlag;
7706   for (auto Reg : RegsToPass) {
7707     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7708     InFlag = Chain.getValue(1);
7709   }
7710 
7711   const int SPDiff = 0;
7712   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7713                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
7714 }
7715 
7716 bool
7717 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7718                                   MachineFunction &MF, bool isVarArg,
7719                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
7720                                   LLVMContext &Context) const {
7721   SmallVector<CCValAssign, 16> RVLocs;
7722   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7723   return CCInfo.CheckReturn(
7724       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7725                 ? RetCC_PPC_Cold
7726                 : RetCC_PPC);
7727 }
7728 
7729 SDValue
7730 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7731                                bool isVarArg,
7732                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7733                                const SmallVectorImpl<SDValue> &OutVals,
7734                                const SDLoc &dl, SelectionDAG &DAG) const {
7735   SmallVector<CCValAssign, 16> RVLocs;
7736   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7737                  *DAG.getContext());
7738   CCInfo.AnalyzeReturn(Outs,
7739                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7740                            ? RetCC_PPC_Cold
7741                            : RetCC_PPC);
7742 
7743   SDValue Flag;
7744   SmallVector<SDValue, 4> RetOps(1, Chain);
7745 
7746   // Copy the result values into the output registers.
7747   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7748     CCValAssign &VA = RVLocs[i];
7749     assert(VA.isRegLoc() && "Can only return in registers!");
7750 
7751     SDValue Arg = OutVals[RealResIdx];
7752 
7753     if (Subtarget.isAIXABI() &&
7754         (VA.getLocVT().isVector() || VA.getValVT().isVector()))
7755       report_fatal_error("Returning vector types not yet supported on AIX.");
7756 
7757     switch (VA.getLocInfo()) {
7758     default: llvm_unreachable("Unknown loc info!");
7759     case CCValAssign::Full: break;
7760     case CCValAssign::AExt:
7761       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7762       break;
7763     case CCValAssign::ZExt:
7764       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7765       break;
7766     case CCValAssign::SExt:
7767       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7768       break;
7769     }
7770     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7771       bool isLittleEndian = Subtarget.isLittleEndian();
7772       // Legalize ret f64 -> ret 2 x i32.
7773       SDValue SVal =
7774           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7775                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7776       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7777       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7778       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7779                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7780       Flag = Chain.getValue(1);
7781       VA = RVLocs[++i]; // skip ahead to next loc
7782       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7783     } else
7784       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7785     Flag = Chain.getValue(1);
7786     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7787   }
7788 
7789   RetOps[0] = Chain;  // Update chain.
7790 
7791   // Add the flag if we have it.
7792   if (Flag.getNode())
7793     RetOps.push_back(Flag);
7794 
7795   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7796 }
7797 
7798 SDValue
7799 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7800                                                 SelectionDAG &DAG) const {
7801   SDLoc dl(Op);
7802 
7803   // Get the correct type for integers.
7804   EVT IntVT = Op.getValueType();
7805 
7806   // Get the inputs.
7807   SDValue Chain = Op.getOperand(0);
7808   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7809   // Build a DYNAREAOFFSET node.
7810   SDValue Ops[2] = {Chain, FPSIdx};
7811   SDVTList VTs = DAG.getVTList(IntVT);
7812   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7813 }
7814 
7815 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7816                                              SelectionDAG &DAG) const {
7817   // When we pop the dynamic allocation we need to restore the SP link.
7818   SDLoc dl(Op);
7819 
7820   // Get the correct type for pointers.
7821   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7822 
7823   // Construct the stack pointer operand.
7824   bool isPPC64 = Subtarget.isPPC64();
7825   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7826   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7827 
7828   // Get the operands for the STACKRESTORE.
7829   SDValue Chain = Op.getOperand(0);
7830   SDValue SaveSP = Op.getOperand(1);
7831 
7832   // Load the old link SP.
7833   SDValue LoadLinkSP =
7834       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7835 
7836   // Restore the stack pointer.
7837   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7838 
7839   // Store the old link SP.
7840   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7841 }
7842 
7843 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7844   MachineFunction &MF = DAG.getMachineFunction();
7845   bool isPPC64 = Subtarget.isPPC64();
7846   EVT PtrVT = getPointerTy(MF.getDataLayout());
7847 
7848   // Get current frame pointer save index.  The users of this index will be
7849   // primarily DYNALLOC instructions.
7850   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7851   int RASI = FI->getReturnAddrSaveIndex();
7852 
7853   // If the frame pointer save index hasn't been defined yet.
7854   if (!RASI) {
7855     // Find out what the fix offset of the frame pointer save area.
7856     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7857     // Allocate the frame index for frame pointer save area.
7858     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7859     // Save the result.
7860     FI->setReturnAddrSaveIndex(RASI);
7861   }
7862   return DAG.getFrameIndex(RASI, PtrVT);
7863 }
7864 
7865 SDValue
7866 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7867   MachineFunction &MF = DAG.getMachineFunction();
7868   bool isPPC64 = Subtarget.isPPC64();
7869   EVT PtrVT = getPointerTy(MF.getDataLayout());
7870 
7871   // Get current frame pointer save index.  The users of this index will be
7872   // primarily DYNALLOC instructions.
7873   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7874   int FPSI = FI->getFramePointerSaveIndex();
7875 
7876   // If the frame pointer save index hasn't been defined yet.
7877   if (!FPSI) {
7878     // Find out what the fix offset of the frame pointer save area.
7879     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7880     // Allocate the frame index for frame pointer save area.
7881     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7882     // Save the result.
7883     FI->setFramePointerSaveIndex(FPSI);
7884   }
7885   return DAG.getFrameIndex(FPSI, PtrVT);
7886 }
7887 
7888 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7889                                                    SelectionDAG &DAG) const {
7890   MachineFunction &MF = DAG.getMachineFunction();
7891   // Get the inputs.
7892   SDValue Chain = Op.getOperand(0);
7893   SDValue Size  = Op.getOperand(1);
7894   SDLoc dl(Op);
7895 
7896   // Get the correct type for pointers.
7897   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7898   // Negate the size.
7899   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7900                                 DAG.getConstant(0, dl, PtrVT), Size);
7901   // Construct a node for the frame pointer save index.
7902   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7903   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7904   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7905   if (hasInlineStackProbe(MF))
7906     return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7907   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7908 }
7909 
7910 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7911                                                      SelectionDAG &DAG) const {
7912   MachineFunction &MF = DAG.getMachineFunction();
7913 
7914   bool isPPC64 = Subtarget.isPPC64();
7915   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7916 
7917   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7918   return DAG.getFrameIndex(FI, PtrVT);
7919 }
7920 
7921 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7922                                                SelectionDAG &DAG) const {
7923   SDLoc DL(Op);
7924   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7925                      DAG.getVTList(MVT::i32, MVT::Other),
7926                      Op.getOperand(0), Op.getOperand(1));
7927 }
7928 
7929 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7930                                                 SelectionDAG &DAG) const {
7931   SDLoc DL(Op);
7932   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7933                      Op.getOperand(0), Op.getOperand(1));
7934 }
7935 
7936 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7937   if (Op.getValueType().isVector())
7938     return LowerVectorLoad(Op, DAG);
7939 
7940   assert(Op.getValueType() == MVT::i1 &&
7941          "Custom lowering only for i1 loads");
7942 
7943   // First, load 8 bits into 32 bits, then truncate to 1 bit.
7944 
7945   SDLoc dl(Op);
7946   LoadSDNode *LD = cast<LoadSDNode>(Op);
7947 
7948   SDValue Chain = LD->getChain();
7949   SDValue BasePtr = LD->getBasePtr();
7950   MachineMemOperand *MMO = LD->getMemOperand();
7951 
7952   SDValue NewLD =
7953       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7954                      BasePtr, MVT::i8, MMO);
7955   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7956 
7957   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7958   return DAG.getMergeValues(Ops, dl);
7959 }
7960 
7961 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7962   if (Op.getOperand(1).getValueType().isVector())
7963     return LowerVectorStore(Op, DAG);
7964 
7965   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7966          "Custom lowering only for i1 stores");
7967 
7968   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7969 
7970   SDLoc dl(Op);
7971   StoreSDNode *ST = cast<StoreSDNode>(Op);
7972 
7973   SDValue Chain = ST->getChain();
7974   SDValue BasePtr = ST->getBasePtr();
7975   SDValue Value = ST->getValue();
7976   MachineMemOperand *MMO = ST->getMemOperand();
7977 
7978   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7979                       Value);
7980   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7981 }
7982 
7983 // FIXME: Remove this once the ANDI glue bug is fixed:
7984 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7985   assert(Op.getValueType() == MVT::i1 &&
7986          "Custom lowering only for i1 results");
7987 
7988   SDLoc DL(Op);
7989   return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7990 }
7991 
7992 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7993                                                SelectionDAG &DAG) const {
7994 
7995   // Implements a vector truncate that fits in a vector register as a shuffle.
7996   // We want to legalize vector truncates down to where the source fits in
7997   // a vector register (and target is therefore smaller than vector register
7998   // size).  At that point legalization will try to custom lower the sub-legal
7999   // result and get here - where we can contain the truncate as a single target
8000   // operation.
8001 
8002   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8003   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8004   //
8005   // We will implement it for big-endian ordering as this (where x denotes
8006   // undefined):
8007   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8008   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8009   //
8010   // The same operation in little-endian ordering will be:
8011   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8012   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8013 
8014   EVT TrgVT = Op.getValueType();
8015   assert(TrgVT.isVector() && "Vector type expected.");
8016   unsigned TrgNumElts = TrgVT.getVectorNumElements();
8017   EVT EltVT = TrgVT.getVectorElementType();
8018   if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8019       TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8020       !isPowerOf2_32(EltVT.getSizeInBits()))
8021     return SDValue();
8022 
8023   SDValue N1 = Op.getOperand(0);
8024   EVT SrcVT = N1.getValueType();
8025   unsigned SrcSize = SrcVT.getSizeInBits();
8026   if (SrcSize > 256 ||
8027       !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8028       !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
8029     return SDValue();
8030   if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8031     return SDValue();
8032 
8033   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8034   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8035 
8036   SDLoc DL(Op);
8037   SDValue Op1, Op2;
8038   if (SrcSize == 256) {
8039     EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8040     EVT SplitVT =
8041         N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
8042     unsigned SplitNumElts = SplitVT.getVectorNumElements();
8043     Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8044                       DAG.getConstant(0, DL, VecIdxTy));
8045     Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8046                       DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8047   }
8048   else {
8049     Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8050     Op2 = DAG.getUNDEF(WideVT);
8051   }
8052 
8053   // First list the elements we want to keep.
8054   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8055   SmallVector<int, 16> ShuffV;
8056   if (Subtarget.isLittleEndian())
8057     for (unsigned i = 0; i < TrgNumElts; ++i)
8058       ShuffV.push_back(i * SizeMult);
8059   else
8060     for (unsigned i = 1; i <= TrgNumElts; ++i)
8061       ShuffV.push_back(i * SizeMult - 1);
8062 
8063   // Populate the remaining elements with undefs.
8064   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8065     // ShuffV.push_back(i + WideNumElts);
8066     ShuffV.push_back(WideNumElts + 1);
8067 
8068   Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8069   Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8070   return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8071 }
8072 
8073 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8074 /// possible.
8075 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8076   // Not FP, or using SPE? Not a fsel.
8077   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
8078       !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
8079     return Op;
8080 
8081   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8082 
8083   EVT ResVT = Op.getValueType();
8084   EVT CmpVT = Op.getOperand(0).getValueType();
8085   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8086   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
8087   SDLoc dl(Op);
8088   SDNodeFlags Flags = Op.getNode()->getFlags();
8089 
8090   // We have xsmaxcdp/xsmincdp which are OK to emit even in the
8091   // presence of infinities.
8092   if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8093     switch (CC) {
8094     default:
8095       break;
8096     case ISD::SETOGT:
8097     case ISD::SETGT:
8098       return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
8099     case ISD::SETOLT:
8100     case ISD::SETLT:
8101       return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
8102     }
8103   }
8104 
8105   // We might be able to do better than this under some circumstances, but in
8106   // general, fsel-based lowering of select is a finite-math-only optimization.
8107   // For more information, see section F.3 of the 2.06 ISA specification.
8108   // With ISA 3.0
8109   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8110       (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
8111     return Op;
8112 
8113   // If the RHS of the comparison is a 0.0, we don't need to do the
8114   // subtraction at all.
8115   SDValue Sel1;
8116   if (isFloatingPointZero(RHS))
8117     switch (CC) {
8118     default: break;       // SETUO etc aren't handled by fsel.
8119     case ISD::SETNE:
8120       std::swap(TV, FV);
8121       LLVM_FALLTHROUGH;
8122     case ISD::SETEQ:
8123       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8124         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8125       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8126       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8127         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8128       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8129                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8130     case ISD::SETULT:
8131     case ISD::SETLT:
8132       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8133       LLVM_FALLTHROUGH;
8134     case ISD::SETOGE:
8135     case ISD::SETGE:
8136       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8137         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8138       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8139     case ISD::SETUGT:
8140     case ISD::SETGT:
8141       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8142       LLVM_FALLTHROUGH;
8143     case ISD::SETOLE:
8144     case ISD::SETLE:
8145       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8146         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8147       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8148                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8149     }
8150 
8151   SDValue Cmp;
8152   switch (CC) {
8153   default: break;       // SETUO etc aren't handled by fsel.
8154   case ISD::SETNE:
8155     std::swap(TV, FV);
8156     LLVM_FALLTHROUGH;
8157   case ISD::SETEQ:
8158     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8159     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8160       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8161     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8162     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8163       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8164     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8165                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8166   case ISD::SETULT:
8167   case ISD::SETLT:
8168     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8169     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8170       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8171     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8172   case ISD::SETOGE:
8173   case ISD::SETGE:
8174     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8175     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8176       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8177     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8178   case ISD::SETUGT:
8179   case ISD::SETGT:
8180     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8181     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8182       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8183     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8184   case ISD::SETOLE:
8185   case ISD::SETLE:
8186     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8187     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8188       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8189     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8190   }
8191   return Op;
8192 }
8193 
8194 static unsigned getPPCStrictOpcode(unsigned Opc) {
8195   switch (Opc) {
8196   default:
8197     llvm_unreachable("No strict version of this opcode!");
8198   case PPCISD::FCTIDZ:
8199     return PPCISD::STRICT_FCTIDZ;
8200   case PPCISD::FCTIWZ:
8201     return PPCISD::STRICT_FCTIWZ;
8202   case PPCISD::FCTIDUZ:
8203     return PPCISD::STRICT_FCTIDUZ;
8204   case PPCISD::FCTIWUZ:
8205     return PPCISD::STRICT_FCTIWUZ;
8206   case PPCISD::FCFID:
8207     return PPCISD::STRICT_FCFID;
8208   case PPCISD::FCFIDU:
8209     return PPCISD::STRICT_FCFIDU;
8210   case PPCISD::FCFIDS:
8211     return PPCISD::STRICT_FCFIDS;
8212   case PPCISD::FCFIDUS:
8213     return PPCISD::STRICT_FCFIDUS;
8214   }
8215 }
8216 
8217 static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8218                               const PPCSubtarget &Subtarget) {
8219   SDLoc dl(Op);
8220   bool IsStrict = Op->isStrictFPOpcode();
8221   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8222                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8223 
8224   // TODO: Any other flags to propagate?
8225   SDNodeFlags Flags;
8226   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8227 
8228   // For strict nodes, source is the second operand.
8229   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8230   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8231   assert(Src.getValueType().isFloatingPoint());
8232   if (Src.getValueType() == MVT::f32) {
8233     if (IsStrict) {
8234       Src =
8235           DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8236                       DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8237       Chain = Src.getValue(1);
8238     } else
8239       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8240   }
8241   SDValue Conv;
8242   unsigned Opc = ISD::DELETED_NODE;
8243   switch (Op.getSimpleValueType().SimpleTy) {
8244   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8245   case MVT::i32:
8246     Opc = IsSigned ? PPCISD::FCTIWZ
8247                    : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8248     break;
8249   case MVT::i64:
8250     assert((IsSigned || Subtarget.hasFPCVT()) &&
8251            "i64 FP_TO_UINT is supported only with FPCVT");
8252     Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8253   }
8254   if (IsStrict) {
8255     Opc = getPPCStrictOpcode(Opc);
8256     Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8257                        {Chain, Src}, Flags);
8258   } else {
8259     Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8260   }
8261   return Conv;
8262 }
8263 
8264 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8265                                                SelectionDAG &DAG,
8266                                                const SDLoc &dl) const {
8267   SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8268   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8269                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8270   bool IsStrict = Op->isStrictFPOpcode();
8271 
8272   // Convert the FP value to an int value through memory.
8273   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8274                   (IsSigned || Subtarget.hasFPCVT());
8275   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8276   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8277   MachinePointerInfo MPI =
8278       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8279 
8280   // Emit a store to the stack slot.
8281   SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8282   Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8283   if (i32Stack) {
8284     MachineFunction &MF = DAG.getMachineFunction();
8285     Alignment = Align(4);
8286     MachineMemOperand *MMO =
8287         MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8288     SDValue Ops[] = { Chain, Tmp, FIPtr };
8289     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8290               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8291   } else
8292     Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8293 
8294   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
8295   // add in a bias on big endian.
8296   if (Op.getValueType() == MVT::i32 && !i32Stack) {
8297     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8298                         DAG.getConstant(4, dl, FIPtr.getValueType()));
8299     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8300   }
8301 
8302   RLI.Chain = Chain;
8303   RLI.Ptr = FIPtr;
8304   RLI.MPI = MPI;
8305   RLI.Alignment = Alignment;
8306 }
8307 
8308 /// Custom lowers floating point to integer conversions to use
8309 /// the direct move instructions available in ISA 2.07 to avoid the
8310 /// need for load/store combinations.
8311 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8312                                                     SelectionDAG &DAG,
8313                                                     const SDLoc &dl) const {
8314   SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8315   SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8316   if (Op->isStrictFPOpcode())
8317     return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8318   else
8319     return Mov;
8320 }
8321 
8322 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8323                                           const SDLoc &dl) const {
8324   bool IsStrict = Op->isStrictFPOpcode();
8325   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8326                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8327   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8328   EVT SrcVT = Src.getValueType();
8329   EVT DstVT = Op.getValueType();
8330 
8331   // FP to INT conversions are legal for f128.
8332   if (SrcVT == MVT::f128)
8333     return Op;
8334 
8335   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8336   // PPC (the libcall is not available).
8337   if (SrcVT == MVT::ppcf128) {
8338     if (DstVT == MVT::i32) {
8339       // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8340       // set other fast-math flags to FP operations in both strict and
8341       // non-strict cases. (FP_TO_SINT, FSUB)
8342       SDNodeFlags Flags;
8343       Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8344 
8345       if (IsSigned) {
8346         SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8347                                  DAG.getIntPtrConstant(0, dl));
8348         SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
8349                                  DAG.getIntPtrConstant(1, dl));
8350 
8351         // Add the two halves of the long double in round-to-zero mode, and use
8352         // a smaller FP_TO_SINT.
8353         if (IsStrict) {
8354           SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8355                                     DAG.getVTList(MVT::f64, MVT::Other),
8356                                     {Op.getOperand(0), Lo, Hi}, Flags);
8357           return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8358                              DAG.getVTList(MVT::i32, MVT::Other),
8359                              {Res.getValue(1), Res}, Flags);
8360         } else {
8361           SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8362           return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8363         }
8364       } else {
8365         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8366         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8367         SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8368         SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8369         if (IsStrict) {
8370           // Sel = Src < 0x80000000
8371           // FltOfs = select Sel, 0.0, 0x80000000
8372           // IntOfs = select Sel, 0, 0x80000000
8373           // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8374           SDValue Chain = Op.getOperand(0);
8375           EVT SetCCVT =
8376               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8377           EVT DstSetCCVT =
8378               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8379           SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8380                                      Chain, true);
8381           Chain = Sel.getValue(1);
8382 
8383           SDValue FltOfs = DAG.getSelect(
8384               dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8385           Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8386 
8387           SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8388                                     DAG.getVTList(SrcVT, MVT::Other),
8389                                     {Chain, Src, FltOfs}, Flags);
8390           Chain = Val.getValue(1);
8391           SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8392                                      DAG.getVTList(DstVT, MVT::Other),
8393                                      {Chain, Val}, Flags);
8394           Chain = SInt.getValue(1);
8395           SDValue IntOfs = DAG.getSelect(
8396               dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8397           SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8398           return DAG.getMergeValues({Result, Chain}, dl);
8399         } else {
8400           // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8401           // FIXME: generated code sucks.
8402           SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8403           True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8404           True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8405           SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8406           return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8407         }
8408       }
8409     }
8410 
8411     return SDValue();
8412   }
8413 
8414   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8415     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8416 
8417   ReuseLoadInfo RLI;
8418   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8419 
8420   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8421                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8422 }
8423 
8424 // We're trying to insert a regular store, S, and then a load, L. If the
8425 // incoming value, O, is a load, we might just be able to have our load use the
8426 // address used by O. However, we don't know if anything else will store to
8427 // that address before we can load from it. To prevent this situation, we need
8428 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8429 // the same chain operand as O, we create a token factor from the chain results
8430 // of O and L, and we replace all uses of O's chain result with that token
8431 // factor (see spliceIntoChain below for this last part).
8432 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8433                                             ReuseLoadInfo &RLI,
8434                                             SelectionDAG &DAG,
8435                                             ISD::LoadExtType ET) const {
8436   // Conservatively skip reusing for constrained FP nodes.
8437   if (Op->isStrictFPOpcode())
8438     return false;
8439 
8440   SDLoc dl(Op);
8441   bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8442                        (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8443   if (ET == ISD::NON_EXTLOAD &&
8444       (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8445       isOperationLegalOrCustom(Op.getOpcode(),
8446                                Op.getOperand(0).getValueType())) {
8447 
8448     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8449     return true;
8450   }
8451 
8452   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8453   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8454       LD->isNonTemporal())
8455     return false;
8456   if (LD->getMemoryVT() != MemVT)
8457     return false;
8458 
8459   // If the result of the load is an illegal type, then we can't build a
8460   // valid chain for reuse since the legalised loads and token factor node that
8461   // ties the legalised loads together uses a different output chain then the
8462   // illegal load.
8463   if (!isTypeLegal(LD->getValueType(0)))
8464     return false;
8465 
8466   RLI.Ptr = LD->getBasePtr();
8467   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8468     assert(LD->getAddressingMode() == ISD::PRE_INC &&
8469            "Non-pre-inc AM on PPC?");
8470     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8471                           LD->getOffset());
8472   }
8473 
8474   RLI.Chain = LD->getChain();
8475   RLI.MPI = LD->getPointerInfo();
8476   RLI.IsDereferenceable = LD->isDereferenceable();
8477   RLI.IsInvariant = LD->isInvariant();
8478   RLI.Alignment = LD->getAlign();
8479   RLI.AAInfo = LD->getAAInfo();
8480   RLI.Ranges = LD->getRanges();
8481 
8482   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8483   return true;
8484 }
8485 
8486 // Given the head of the old chain, ResChain, insert a token factor containing
8487 // it and NewResChain, and make users of ResChain now be users of that token
8488 // factor.
8489 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8490 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8491                                         SDValue NewResChain,
8492                                         SelectionDAG &DAG) const {
8493   if (!ResChain)
8494     return;
8495 
8496   SDLoc dl(NewResChain);
8497 
8498   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8499                            NewResChain, DAG.getUNDEF(MVT::Other));
8500   assert(TF.getNode() != NewResChain.getNode() &&
8501          "A new TF really is required here");
8502 
8503   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8504   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8505 }
8506 
8507 /// Analyze profitability of direct move
8508 /// prefer float load to int load plus direct move
8509 /// when there is no integer use of int load
8510 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8511   SDNode *Origin = Op.getOperand(0).getNode();
8512   if (Origin->getOpcode() != ISD::LOAD)
8513     return true;
8514 
8515   // If there is no LXSIBZX/LXSIHZX, like Power8,
8516   // prefer direct move if the memory size is 1 or 2 bytes.
8517   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8518   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8519     return true;
8520 
8521   for (SDNode::use_iterator UI = Origin->use_begin(),
8522                             UE = Origin->use_end();
8523        UI != UE; ++UI) {
8524 
8525     // Only look at the users of the loaded value.
8526     if (UI.getUse().get().getResNo() != 0)
8527       continue;
8528 
8529     if (UI->getOpcode() != ISD::SINT_TO_FP &&
8530         UI->getOpcode() != ISD::UINT_TO_FP &&
8531         UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8532         UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8533       return true;
8534   }
8535 
8536   return false;
8537 }
8538 
8539 static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8540                               const PPCSubtarget &Subtarget,
8541                               SDValue Chain = SDValue()) {
8542   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8543                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8544   SDLoc dl(Op);
8545 
8546   // TODO: Any other flags to propagate?
8547   SDNodeFlags Flags;
8548   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8549 
8550   // If we have FCFIDS, then use it when converting to single-precision.
8551   // Otherwise, convert to double-precision and then round.
8552   bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8553   unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8554                               : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8555   EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8556   if (Op->isStrictFPOpcode()) {
8557     if (!Chain)
8558       Chain = Op.getOperand(0);
8559     return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8560                        DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8561   } else
8562     return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8563 }
8564 
8565 /// Custom lowers integer to floating point conversions to use
8566 /// the direct move instructions available in ISA 2.07 to avoid the
8567 /// need for load/store combinations.
8568 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8569                                                     SelectionDAG &DAG,
8570                                                     const SDLoc &dl) const {
8571   assert((Op.getValueType() == MVT::f32 ||
8572           Op.getValueType() == MVT::f64) &&
8573          "Invalid floating point type as target of conversion");
8574   assert(Subtarget.hasFPCVT() &&
8575          "Int to FP conversions with direct moves require FPCVT");
8576   SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8577   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8578   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8579                 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8580   unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8581   SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8582   return convertIntToFP(Op, Mov, DAG, Subtarget);
8583 }
8584 
8585 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8586 
8587   EVT VecVT = Vec.getValueType();
8588   assert(VecVT.isVector() && "Expected a vector type.");
8589   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8590 
8591   EVT EltVT = VecVT.getVectorElementType();
8592   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8593   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8594 
8595   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8596   SmallVector<SDValue, 16> Ops(NumConcat);
8597   Ops[0] = Vec;
8598   SDValue UndefVec = DAG.getUNDEF(VecVT);
8599   for (unsigned i = 1; i < NumConcat; ++i)
8600     Ops[i] = UndefVec;
8601 
8602   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8603 }
8604 
8605 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8606                                                 const SDLoc &dl) const {
8607   bool IsStrict = Op->isStrictFPOpcode();
8608   unsigned Opc = Op.getOpcode();
8609   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8610   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8611           Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8612          "Unexpected conversion type");
8613   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8614          "Supports conversions to v2f64/v4f32 only.");
8615 
8616   // TODO: Any other flags to propagate?
8617   SDNodeFlags Flags;
8618   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8619 
8620   bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8621   bool FourEltRes = Op.getValueType() == MVT::v4f32;
8622 
8623   SDValue Wide = widenVec(DAG, Src, dl);
8624   EVT WideVT = Wide.getValueType();
8625   unsigned WideNumElts = WideVT.getVectorNumElements();
8626   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8627 
8628   SmallVector<int, 16> ShuffV;
8629   for (unsigned i = 0; i < WideNumElts; ++i)
8630     ShuffV.push_back(i + WideNumElts);
8631 
8632   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8633   int SaveElts = FourEltRes ? 4 : 2;
8634   if (Subtarget.isLittleEndian())
8635     for (int i = 0; i < SaveElts; i++)
8636       ShuffV[i * Stride] = i;
8637   else
8638     for (int i = 1; i <= SaveElts; i++)
8639       ShuffV[i * Stride - 1] = i - 1;
8640 
8641   SDValue ShuffleSrc2 =
8642       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8643   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8644 
8645   SDValue Extend;
8646   if (SignedConv) {
8647     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8648     EVT ExtVT = Src.getValueType();
8649     if (Subtarget.hasP9Altivec())
8650       ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8651                                IntermediateVT.getVectorNumElements());
8652 
8653     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8654                          DAG.getValueType(ExtVT));
8655   } else
8656     Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8657 
8658   if (IsStrict)
8659     return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8660                        {Op.getOperand(0), Extend}, Flags);
8661 
8662   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8663 }
8664 
8665 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8666                                           SelectionDAG &DAG) const {
8667   SDLoc dl(Op);
8668   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8669                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8670   bool IsStrict = Op->isStrictFPOpcode();
8671   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8672   SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8673 
8674   // TODO: Any other flags to propagate?
8675   SDNodeFlags Flags;
8676   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8677 
8678   EVT InVT = Src.getValueType();
8679   EVT OutVT = Op.getValueType();
8680   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8681       isOperationCustom(Op.getOpcode(), InVT))
8682     return LowerINT_TO_FPVector(Op, DAG, dl);
8683 
8684   // Conversions to f128 are legal.
8685   if (Op.getValueType() == MVT::f128)
8686     return Op;
8687 
8688   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8689   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8690     return SDValue();
8691 
8692   if (Src.getValueType() == MVT::i1)
8693     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8694                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
8695                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
8696 
8697   // If we have direct moves, we can do all the conversion, skip the store/load
8698   // however, without FPCVT we can't do most conversions.
8699   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8700       Subtarget.isPPC64() && Subtarget.hasFPCVT())
8701     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8702 
8703   assert((IsSigned || Subtarget.hasFPCVT()) &&
8704          "UINT_TO_FP is supported only with FPCVT");
8705 
8706   if (Src.getValueType() == MVT::i64) {
8707     SDValue SINT = Src;
8708     // When converting to single-precision, we actually need to convert
8709     // to double-precision first and then round to single-precision.
8710     // To avoid double-rounding effects during that operation, we have
8711     // to prepare the input operand.  Bits that might be truncated when
8712     // converting to double-precision are replaced by a bit that won't
8713     // be lost at this stage, but is below the single-precision rounding
8714     // position.
8715     //
8716     // However, if -enable-unsafe-fp-math is in effect, accept double
8717     // rounding to avoid the extra overhead.
8718     if (Op.getValueType() == MVT::f32 &&
8719         !Subtarget.hasFPCVT() &&
8720         !DAG.getTarget().Options.UnsafeFPMath) {
8721 
8722       // Twiddle input to make sure the low 11 bits are zero.  (If this
8723       // is the case, we are guaranteed the value will fit into the 53 bit
8724       // mantissa of an IEEE double-precision value without rounding.)
8725       // If any of those low 11 bits were not zero originally, make sure
8726       // bit 12 (value 2048) is set instead, so that the final rounding
8727       // to single-precision gets the correct result.
8728       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8729                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
8730       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8731                           Round, DAG.getConstant(2047, dl, MVT::i64));
8732       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8733       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8734                           Round, DAG.getConstant(-2048, dl, MVT::i64));
8735 
8736       // However, we cannot use that value unconditionally: if the magnitude
8737       // of the input value is small, the bit-twiddling we did above might
8738       // end up visibly changing the output.  Fortunately, in that case, we
8739       // don't need to twiddle bits since the original input will convert
8740       // exactly to double-precision floating-point already.  Therefore,
8741       // construct a conditional to use the original value if the top 11
8742       // bits are all sign-bit copies, and use the rounded value computed
8743       // above otherwise.
8744       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8745                                  SINT, DAG.getConstant(53, dl, MVT::i32));
8746       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8747                          Cond, DAG.getConstant(1, dl, MVT::i64));
8748       Cond = DAG.getSetCC(
8749           dl,
8750           getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8751           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8752 
8753       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8754     }
8755 
8756     ReuseLoadInfo RLI;
8757     SDValue Bits;
8758 
8759     MachineFunction &MF = DAG.getMachineFunction();
8760     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8761       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8762                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8763       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8764     } else if (Subtarget.hasLFIWAX() &&
8765                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8766       MachineMemOperand *MMO =
8767         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8768                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8769       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8770       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8771                                      DAG.getVTList(MVT::f64, MVT::Other),
8772                                      Ops, MVT::i32, MMO);
8773       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8774     } else if (Subtarget.hasFPCVT() &&
8775                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8776       MachineMemOperand *MMO =
8777         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8778                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8779       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8780       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8781                                      DAG.getVTList(MVT::f64, MVT::Other),
8782                                      Ops, MVT::i32, MMO);
8783       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8784     } else if (((Subtarget.hasLFIWAX() &&
8785                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8786                 (Subtarget.hasFPCVT() &&
8787                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8788                SINT.getOperand(0).getValueType() == MVT::i32) {
8789       MachineFrameInfo &MFI = MF.getFrameInfo();
8790       EVT PtrVT = getPointerTy(DAG.getDataLayout());
8791 
8792       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8793       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8794 
8795       SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8796                                    MachinePointerInfo::getFixedStack(
8797                                        DAG.getMachineFunction(), FrameIdx));
8798       Chain = Store;
8799 
8800       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8801              "Expected an i32 store");
8802 
8803       RLI.Ptr = FIdx;
8804       RLI.Chain = Chain;
8805       RLI.MPI =
8806           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8807       RLI.Alignment = Align(4);
8808 
8809       MachineMemOperand *MMO =
8810         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8811                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8812       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8813       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8814                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
8815                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
8816                                      Ops, MVT::i32, MMO);
8817       Chain = Bits.getValue(1);
8818     } else
8819       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8820 
8821     SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8822     if (IsStrict)
8823       Chain = FP.getValue(1);
8824 
8825     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8826       if (IsStrict)
8827         FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8828                          DAG.getVTList(MVT::f32, MVT::Other),
8829                          {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8830       else
8831         FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8832                          DAG.getIntPtrConstant(0, dl));
8833     }
8834     return FP;
8835   }
8836 
8837   assert(Src.getValueType() == MVT::i32 &&
8838          "Unhandled INT_TO_FP type in custom expander!");
8839   // Since we only generate this in 64-bit mode, we can take advantage of
8840   // 64-bit registers.  In particular, sign extend the input value into the
8841   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8842   // then lfd it and fcfid it.
8843   MachineFunction &MF = DAG.getMachineFunction();
8844   MachineFrameInfo &MFI = MF.getFrameInfo();
8845   EVT PtrVT = getPointerTy(MF.getDataLayout());
8846 
8847   SDValue Ld;
8848   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8849     ReuseLoadInfo RLI;
8850     bool ReusingLoad;
8851     if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8852       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8853       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8854 
8855       SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8856                                    MachinePointerInfo::getFixedStack(
8857                                        DAG.getMachineFunction(), FrameIdx));
8858       Chain = Store;
8859 
8860       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8861              "Expected an i32 store");
8862 
8863       RLI.Ptr = FIdx;
8864       RLI.Chain = Chain;
8865       RLI.MPI =
8866           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8867       RLI.Alignment = Align(4);
8868     }
8869 
8870     MachineMemOperand *MMO =
8871       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8872                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8873     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8874     Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8875                                  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8876                                  MVT::i32, MMO);
8877     Chain = Ld.getValue(1);
8878     if (ReusingLoad)
8879       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8880   } else {
8881     assert(Subtarget.isPPC64() &&
8882            "i32->FP without LFIWAX supported only on PPC64");
8883 
8884     int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8885     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8886 
8887     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8888 
8889     // STD the extended value into the stack slot.
8890     SDValue Store = DAG.getStore(
8891         Chain, dl, Ext64, FIdx,
8892         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8893     Chain = Store;
8894 
8895     // Load the value as a double.
8896     Ld = DAG.getLoad(
8897         MVT::f64, dl, Chain, FIdx,
8898         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8899     Chain = Ld.getValue(1);
8900   }
8901 
8902   // FCFID it and return it.
8903   SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8904   if (IsStrict)
8905     Chain = FP.getValue(1);
8906   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8907     if (IsStrict)
8908       FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8909                        DAG.getVTList(MVT::f32, MVT::Other),
8910                        {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8911     else
8912       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8913                        DAG.getIntPtrConstant(0, dl));
8914   }
8915   return FP;
8916 }
8917 
8918 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8919                                             SelectionDAG &DAG) const {
8920   SDLoc dl(Op);
8921   /*
8922    The rounding mode is in bits 30:31 of FPSR, and has the following
8923    settings:
8924      00 Round to nearest
8925      01 Round to 0
8926      10 Round to +inf
8927      11 Round to -inf
8928 
8929   FLT_ROUNDS, on the other hand, expects the following:
8930     -1 Undefined
8931      0 Round to 0
8932      1 Round to nearest
8933      2 Round to +inf
8934      3 Round to -inf
8935 
8936   To perform the conversion, we do:
8937     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8938   */
8939 
8940   MachineFunction &MF = DAG.getMachineFunction();
8941   EVT VT = Op.getValueType();
8942   EVT PtrVT = getPointerTy(MF.getDataLayout());
8943 
8944   // Save FP Control Word to register
8945   SDValue Chain = Op.getOperand(0);
8946   SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8947   Chain = MFFS.getValue(1);
8948 
8949   // Save FP register to stack slot
8950   int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8951   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8952   Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8953 
8954   // Load FP Control Word from low 32 bits of stack slot.
8955   SDValue Four = DAG.getConstant(4, dl, PtrVT);
8956   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8957   SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8958   Chain = CWD.getValue(1);
8959 
8960   // Transform as necessary
8961   SDValue CWD1 =
8962     DAG.getNode(ISD::AND, dl, MVT::i32,
8963                 CWD, DAG.getConstant(3, dl, MVT::i32));
8964   SDValue CWD2 =
8965     DAG.getNode(ISD::SRL, dl, MVT::i32,
8966                 DAG.getNode(ISD::AND, dl, MVT::i32,
8967                             DAG.getNode(ISD::XOR, dl, MVT::i32,
8968                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
8969                             DAG.getConstant(3, dl, MVT::i32)),
8970                 DAG.getConstant(1, dl, MVT::i32));
8971 
8972   SDValue RetVal =
8973     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8974 
8975   RetVal =
8976       DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8977                   dl, VT, RetVal);
8978 
8979   return DAG.getMergeValues({RetVal, Chain}, dl);
8980 }
8981 
8982 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8983   EVT VT = Op.getValueType();
8984   unsigned BitWidth = VT.getSizeInBits();
8985   SDLoc dl(Op);
8986   assert(Op.getNumOperands() == 3 &&
8987          VT == Op.getOperand(1).getValueType() &&
8988          "Unexpected SHL!");
8989 
8990   // Expand into a bunch of logical ops.  Note that these ops
8991   // depend on the PPC behavior for oversized shift amounts.
8992   SDValue Lo = Op.getOperand(0);
8993   SDValue Hi = Op.getOperand(1);
8994   SDValue Amt = Op.getOperand(2);
8995   EVT AmtVT = Amt.getValueType();
8996 
8997   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8998                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8999   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9000   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9001   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9002   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9003                              DAG.getConstant(-BitWidth, dl, AmtVT));
9004   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9005   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9006   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9007   SDValue OutOps[] = { OutLo, OutHi };
9008   return DAG.getMergeValues(OutOps, dl);
9009 }
9010 
9011 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9012   EVT VT = Op.getValueType();
9013   SDLoc dl(Op);
9014   unsigned BitWidth = VT.getSizeInBits();
9015   assert(Op.getNumOperands() == 3 &&
9016          VT == Op.getOperand(1).getValueType() &&
9017          "Unexpected SRL!");
9018 
9019   // Expand into a bunch of logical ops.  Note that these ops
9020   // depend on the PPC behavior for oversized shift amounts.
9021   SDValue Lo = Op.getOperand(0);
9022   SDValue Hi = Op.getOperand(1);
9023   SDValue Amt = Op.getOperand(2);
9024   EVT AmtVT = Amt.getValueType();
9025 
9026   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9027                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9028   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9029   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9030   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9031   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9032                              DAG.getConstant(-BitWidth, dl, AmtVT));
9033   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9034   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9035   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9036   SDValue OutOps[] = { OutLo, OutHi };
9037   return DAG.getMergeValues(OutOps, dl);
9038 }
9039 
9040 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9041   SDLoc dl(Op);
9042   EVT VT = Op.getValueType();
9043   unsigned BitWidth = VT.getSizeInBits();
9044   assert(Op.getNumOperands() == 3 &&
9045          VT == Op.getOperand(1).getValueType() &&
9046          "Unexpected SRA!");
9047 
9048   // Expand into a bunch of logical ops, followed by a select_cc.
9049   SDValue Lo = Op.getOperand(0);
9050   SDValue Hi = Op.getOperand(1);
9051   SDValue Amt = Op.getOperand(2);
9052   EVT AmtVT = Amt.getValueType();
9053 
9054   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9055                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9056   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9057   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9058   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9059   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9060                              DAG.getConstant(-BitWidth, dl, AmtVT));
9061   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9062   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9063   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9064                                   Tmp4, Tmp6, ISD::SETLE);
9065   SDValue OutOps[] = { OutLo, OutHi };
9066   return DAG.getMergeValues(OutOps, dl);
9067 }
9068 
9069 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9070                                             SelectionDAG &DAG) const {
9071   SDLoc dl(Op);
9072   EVT VT = Op.getValueType();
9073   unsigned BitWidth = VT.getSizeInBits();
9074 
9075   bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9076   SDValue X = Op.getOperand(0);
9077   SDValue Y = Op.getOperand(1);
9078   SDValue Z = Op.getOperand(2);
9079   EVT AmtVT = Z.getValueType();
9080 
9081   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9082   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9083   // This is simpler than TargetLowering::expandFunnelShift because we can rely
9084   // on PowerPC shift by BW being well defined.
9085   Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9086                   DAG.getConstant(BitWidth - 1, dl, AmtVT));
9087   SDValue SubZ =
9088       DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9089   X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9090   Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9091   return DAG.getNode(ISD::OR, dl, VT, X, Y);
9092 }
9093 
9094 //===----------------------------------------------------------------------===//
9095 // Vector related lowering.
9096 //
9097 
9098 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9099 /// element size of SplatSize. Cast the result to VT.
9100 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9101                                       SelectionDAG &DAG, const SDLoc &dl) {
9102   static const MVT VTys[] = { // canonical VT to use for each size.
9103     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9104   };
9105 
9106   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9107 
9108   // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9109   if (Val == ((1LU << (SplatSize * 8)) - 1)) {
9110     SplatSize = 1;
9111     Val = 0xFF;
9112   }
9113 
9114   EVT CanonicalVT = VTys[SplatSize-1];
9115 
9116   // Build a canonical splat for this value.
9117   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9118 }
9119 
9120 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9121 /// specified intrinsic ID.
9122 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9123                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
9124   if (DestVT == MVT::Other) DestVT = Op.getValueType();
9125   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9126                      DAG.getConstant(IID, dl, MVT::i32), Op);
9127 }
9128 
9129 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9130 /// specified intrinsic ID.
9131 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9132                                 SelectionDAG &DAG, const SDLoc &dl,
9133                                 EVT DestVT = MVT::Other) {
9134   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9135   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9136                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9137 }
9138 
9139 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9140 /// specified intrinsic ID.
9141 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9142                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9143                                 EVT DestVT = MVT::Other) {
9144   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9145   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9146                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9147 }
9148 
9149 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9150 /// amount.  The result has the specified value type.
9151 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9152                            SelectionDAG &DAG, const SDLoc &dl) {
9153   // Force LHS/RHS to be the right type.
9154   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9155   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9156 
9157   int Ops[16];
9158   for (unsigned i = 0; i != 16; ++i)
9159     Ops[i] = i + Amt;
9160   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9161   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9162 }
9163 
9164 /// Do we have an efficient pattern in a .td file for this node?
9165 ///
9166 /// \param V - pointer to the BuildVectorSDNode being matched
9167 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9168 ///
9169 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9170 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9171 /// the opposite is true (expansion is beneficial) are:
9172 /// - The node builds a vector out of integers that are not 32 or 64-bits
9173 /// - The node builds a vector out of constants
9174 /// - The node is a "load-and-splat"
9175 /// In all other cases, we will choose to keep the BUILD_VECTOR.
9176 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9177                                             bool HasDirectMove,
9178                                             bool HasP8Vector) {
9179   EVT VecVT = V->getValueType(0);
9180   bool RightType = VecVT == MVT::v2f64 ||
9181     (HasP8Vector && VecVT == MVT::v4f32) ||
9182     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9183   if (!RightType)
9184     return false;
9185 
9186   bool IsSplat = true;
9187   bool IsLoad = false;
9188   SDValue Op0 = V->getOperand(0);
9189 
9190   // This function is called in a block that confirms the node is not a constant
9191   // splat. So a constant BUILD_VECTOR here means the vector is built out of
9192   // different constants.
9193   if (V->isConstant())
9194     return false;
9195   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9196     if (V->getOperand(i).isUndef())
9197       return false;
9198     // We want to expand nodes that represent load-and-splat even if the
9199     // loaded value is a floating point truncation or conversion to int.
9200     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9201         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9202          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9203         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9204          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9205         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9206          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9207       IsLoad = true;
9208     // If the operands are different or the input is not a load and has more
9209     // uses than just this BV node, then it isn't a splat.
9210     if (V->getOperand(i) != Op0 ||
9211         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9212       IsSplat = false;
9213   }
9214   return !(IsSplat && IsLoad);
9215 }
9216 
9217 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9218 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9219 
9220   SDLoc dl(Op);
9221   SDValue Op0 = Op->getOperand(0);
9222 
9223   if ((Op.getValueType() != MVT::f128) ||
9224       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9225       (Op0.getOperand(0).getValueType() != MVT::i64) ||
9226       (Op0.getOperand(1).getValueType() != MVT::i64))
9227     return SDValue();
9228 
9229   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9230                      Op0.getOperand(1));
9231 }
9232 
9233 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9234   const SDValue *InputLoad = &Op;
9235   if (InputLoad->getOpcode() == ISD::BITCAST)
9236     InputLoad = &InputLoad->getOperand(0);
9237   if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9238       InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9239     IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9240     InputLoad = &InputLoad->getOperand(0);
9241   }
9242   if (InputLoad->getOpcode() != ISD::LOAD)
9243     return nullptr;
9244   LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9245   return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9246 }
9247 
9248 // Convert the argument APFloat to a single precision APFloat if there is no
9249 // loss in information during the conversion to single precision APFloat and the
9250 // resulting number is not a denormal number. Return true if successful.
9251 bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9252   APFloat APFloatToConvert = ArgAPFloat;
9253   bool LosesInfo = true;
9254   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9255                            &LosesInfo);
9256   bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9257   if (Success)
9258     ArgAPFloat = APFloatToConvert;
9259   return Success;
9260 }
9261 
9262 // Bitcast the argument APInt to a double and convert it to a single precision
9263 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9264 // argument if there is no loss in information during the conversion from
9265 // double to single precision APFloat and the resulting number is not a denormal
9266 // number. Return true if successful.
9267 bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9268   double DpValue = ArgAPInt.bitsToDouble();
9269   APFloat APFloatDp(DpValue);
9270   bool Success = convertToNonDenormSingle(APFloatDp);
9271   if (Success)
9272     ArgAPInt = APFloatDp.bitcastToAPInt();
9273   return Success;
9274 }
9275 
9276 // If this is a case we can't handle, return null and let the default
9277 // expansion code take care of it.  If we CAN select this case, and if it
9278 // selects to a single instruction, return Op.  Otherwise, if we can codegen
9279 // this case more efficiently than a constant pool load, lower it to the
9280 // sequence of ops that should be used.
9281 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9282                                              SelectionDAG &DAG) const {
9283   SDLoc dl(Op);
9284   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9285   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9286 
9287   // Check if this is a splat of a constant value.
9288   APInt APSplatBits, APSplatUndef;
9289   unsigned SplatBitSize;
9290   bool HasAnyUndefs;
9291   bool BVNIsConstantSplat =
9292       BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9293                            HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9294 
9295   // If it is a splat of a double, check if we can shrink it to a 32 bit
9296   // non-denormal float which when converted back to double gives us the same
9297   // double. This is to exploit the XXSPLTIDP instruction.
9298   if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
9299       (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
9300       convertToNonDenormSingle(APSplatBits)) {
9301     SDValue SplatNode = DAG.getNode(
9302         PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9303         DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9304     return DAG.getBitcast(Op.getValueType(), SplatNode);
9305   }
9306 
9307   if (!BVNIsConstantSplat || SplatBitSize > 32) {
9308 
9309     bool IsPermutedLoad = false;
9310     const SDValue *InputLoad =
9311         getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
9312     // Handle load-and-splat patterns as we have instructions that will do this
9313     // in one go.
9314     if (InputLoad && DAG.isSplatValue(Op, true)) {
9315       LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9316 
9317       // We have handling for 4 and 8 byte elements.
9318       unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
9319 
9320       // Checking for a single use of this load, we have to check for vector
9321       // width (128 bits) / ElementSize uses (since each operand of the
9322       // BUILD_VECTOR is a separate use of the value.
9323       unsigned NumUsesOfInputLD = 128 / ElementSize;
9324       for (SDValue BVInOp : Op->ops())
9325         if (BVInOp.isUndef())
9326           NumUsesOfInputLD--;
9327       assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9328       if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9329           ((Subtarget.hasVSX() && ElementSize == 64) ||
9330            (Subtarget.hasP9Vector() && ElementSize == 32))) {
9331         SDValue Ops[] = {
9332           LD->getChain(),    // Chain
9333           LD->getBasePtr(),  // Ptr
9334           DAG.getValueType(Op.getValueType()) // VT
9335         };
9336         SDValue LdSplt = DAG.getMemIntrinsicNode(
9337             PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
9338             Ops, LD->getMemoryVT(), LD->getMemOperand());
9339         // Replace all uses of the output chain of the original load with the
9340         // output chain of the new load.
9341         DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9342                                       LdSplt.getValue(1));
9343         return LdSplt;
9344       }
9345     }
9346 
9347     // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
9348     // lowered to VSX instructions under certain conditions.
9349     // Without VSX, there is no pattern more efficient than expanding the node.
9350     if (Subtarget.hasVSX() &&
9351         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9352                                         Subtarget.hasP8Vector()))
9353       return Op;
9354     return SDValue();
9355   }
9356 
9357   uint64_t SplatBits = APSplatBits.getZExtValue();
9358   uint64_t SplatUndef = APSplatUndef.getZExtValue();
9359   unsigned SplatSize = SplatBitSize / 8;
9360 
9361   // First, handle single instruction cases.
9362 
9363   // All zeros?
9364   if (SplatBits == 0) {
9365     // Canonicalize all zero vectors to be v4i32.
9366     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9367       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9368       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9369     }
9370     return Op;
9371   }
9372 
9373   // We have XXSPLTIW for constant splats four bytes wide.
9374   // Given vector length is a multiple of 4, 2-byte splats can be replaced
9375   // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9376   // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9377   // turned into a 4-byte splat of 0xABABABAB.
9378   if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9379     return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9380                                   Op.getValueType(), DAG, dl);
9381 
9382   if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9383     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9384                                   dl);
9385 
9386   // We have XXSPLTIB for constant splats one byte wide.
9387   if (Subtarget.hasP9Vector() && SplatSize == 1)
9388     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9389                                   dl);
9390 
9391   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9392   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9393                     (32-SplatBitSize));
9394   if (SextVal >= -16 && SextVal <= 15)
9395     return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9396                                   dl);
9397 
9398   // Two instruction sequences.
9399 
9400   // If this value is in the range [-32,30] and is even, use:
9401   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9402   // If this value is in the range [17,31] and is odd, use:
9403   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9404   // If this value is in the range [-31,-17] and is odd, use:
9405   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9406   // Note the last two are three-instruction sequences.
9407   if (SextVal >= -32 && SextVal <= 31) {
9408     // To avoid having these optimizations undone by constant folding,
9409     // we convert to a pseudo that will be expanded later into one of
9410     // the above forms.
9411     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9412     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9413               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9414     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9415     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9416     if (VT == Op.getValueType())
9417       return RetVal;
9418     else
9419       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9420   }
9421 
9422   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
9423   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
9424   // for fneg/fabs.
9425   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9426     // Make -1 and vspltisw -1:
9427     SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9428 
9429     // Make the VSLW intrinsic, computing 0x8000_0000.
9430     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9431                                    OnesV, DAG, dl);
9432 
9433     // xor by OnesV to invert it.
9434     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9435     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9436   }
9437 
9438   // Check to see if this is a wide variety of vsplti*, binop self cases.
9439   static const signed char SplatCsts[] = {
9440     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9441     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9442   };
9443 
9444   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9445     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9446     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
9447     int i = SplatCsts[idx];
9448 
9449     // Figure out what shift amount will be used by altivec if shifted by i in
9450     // this splat size.
9451     unsigned TypeShiftAmt = i & (SplatBitSize-1);
9452 
9453     // vsplti + shl self.
9454     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9455       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9456       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9457         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9458         Intrinsic::ppc_altivec_vslw
9459       };
9460       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9461       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9462     }
9463 
9464     // vsplti + srl self.
9465     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9466       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9467       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9468         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9469         Intrinsic::ppc_altivec_vsrw
9470       };
9471       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9472       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9473     }
9474 
9475     // vsplti + sra self.
9476     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9477       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9478       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9479         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
9480         Intrinsic::ppc_altivec_vsraw
9481       };
9482       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9483       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9484     }
9485 
9486     // vsplti + rol self.
9487     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9488                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9489       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9490       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9491         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9492         Intrinsic::ppc_altivec_vrlw
9493       };
9494       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9495       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9496     }
9497 
9498     // t = vsplti c, result = vsldoi t, t, 1
9499     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9500       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9501       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9502       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9503     }
9504     // t = vsplti c, result = vsldoi t, t, 2
9505     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9506       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9507       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9508       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9509     }
9510     // t = vsplti c, result = vsldoi t, t, 3
9511     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9512       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9513       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9514       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9515     }
9516   }
9517 
9518   return SDValue();
9519 }
9520 
9521 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9522 /// the specified operations to build the shuffle.
9523 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9524                                       SDValue RHS, SelectionDAG &DAG,
9525                                       const SDLoc &dl) {
9526   unsigned OpNum = (PFEntry >> 26) & 0x0F;
9527   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9528   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
9529 
9530   enum {
9531     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9532     OP_VMRGHW,
9533     OP_VMRGLW,
9534     OP_VSPLTISW0,
9535     OP_VSPLTISW1,
9536     OP_VSPLTISW2,
9537     OP_VSPLTISW3,
9538     OP_VSLDOI4,
9539     OP_VSLDOI8,
9540     OP_VSLDOI12
9541   };
9542 
9543   if (OpNum == OP_COPY) {
9544     if (LHSID == (1*9+2)*9+3) return LHS;
9545     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9546     return RHS;
9547   }
9548 
9549   SDValue OpLHS, OpRHS;
9550   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9551   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9552 
9553   int ShufIdxs[16];
9554   switch (OpNum) {
9555   default: llvm_unreachable("Unknown i32 permute!");
9556   case OP_VMRGHW:
9557     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
9558     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9559     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
9560     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9561     break;
9562   case OP_VMRGLW:
9563     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9564     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9565     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9566     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9567     break;
9568   case OP_VSPLTISW0:
9569     for (unsigned i = 0; i != 16; ++i)
9570       ShufIdxs[i] = (i&3)+0;
9571     break;
9572   case OP_VSPLTISW1:
9573     for (unsigned i = 0; i != 16; ++i)
9574       ShufIdxs[i] = (i&3)+4;
9575     break;
9576   case OP_VSPLTISW2:
9577     for (unsigned i = 0; i != 16; ++i)
9578       ShufIdxs[i] = (i&3)+8;
9579     break;
9580   case OP_VSPLTISW3:
9581     for (unsigned i = 0; i != 16; ++i)
9582       ShufIdxs[i] = (i&3)+12;
9583     break;
9584   case OP_VSLDOI4:
9585     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9586   case OP_VSLDOI8:
9587     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9588   case OP_VSLDOI12:
9589     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9590   }
9591   EVT VT = OpLHS.getValueType();
9592   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9593   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9594   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9595   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9596 }
9597 
9598 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9599 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9600 /// SDValue.
9601 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9602                                            SelectionDAG &DAG) const {
9603   const unsigned BytesInVector = 16;
9604   bool IsLE = Subtarget.isLittleEndian();
9605   SDLoc dl(N);
9606   SDValue V1 = N->getOperand(0);
9607   SDValue V2 = N->getOperand(1);
9608   unsigned ShiftElts = 0, InsertAtByte = 0;
9609   bool Swap = false;
9610 
9611   // Shifts required to get the byte we want at element 7.
9612   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
9613                                    0, 15, 14, 13, 12, 11, 10, 9};
9614   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9615                                 1, 2,  3,  4,  5,  6,  7,  8};
9616 
9617   ArrayRef<int> Mask = N->getMask();
9618   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9619 
9620   // For each mask element, find out if we're just inserting something
9621   // from V2 into V1 or vice versa.
9622   // Possible permutations inserting an element from V2 into V1:
9623   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9624   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9625   //   ...
9626   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9627   // Inserting from V1 into V2 will be similar, except mask range will be
9628   // [16,31].
9629 
9630   bool FoundCandidate = false;
9631   // If both vector operands for the shuffle are the same vector, the mask
9632   // will contain only elements from the first one and the second one will be
9633   // undef.
9634   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9635   // Go through the mask of half-words to find an element that's being moved
9636   // from one vector to the other.
9637   for (unsigned i = 0; i < BytesInVector; ++i) {
9638     unsigned CurrentElement = Mask[i];
9639     // If 2nd operand is undefined, we should only look for element 7 in the
9640     // Mask.
9641     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9642       continue;
9643 
9644     bool OtherElementsInOrder = true;
9645     // Examine the other elements in the Mask to see if they're in original
9646     // order.
9647     for (unsigned j = 0; j < BytesInVector; ++j) {
9648       if (j == i)
9649         continue;
9650       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9651       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
9652       // in which we always assume we're always picking from the 1st operand.
9653       int MaskOffset =
9654           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9655       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9656         OtherElementsInOrder = false;
9657         break;
9658       }
9659     }
9660     // If other elements are in original order, we record the number of shifts
9661     // we need to get the element we want into element 7. Also record which byte
9662     // in the vector we should insert into.
9663     if (OtherElementsInOrder) {
9664       // If 2nd operand is undefined, we assume no shifts and no swapping.
9665       if (V2.isUndef()) {
9666         ShiftElts = 0;
9667         Swap = false;
9668       } else {
9669         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9670         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9671                          : BigEndianShifts[CurrentElement & 0xF];
9672         Swap = CurrentElement < BytesInVector;
9673       }
9674       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9675       FoundCandidate = true;
9676       break;
9677     }
9678   }
9679 
9680   if (!FoundCandidate)
9681     return SDValue();
9682 
9683   // Candidate found, construct the proper SDAG sequence with VINSERTB,
9684   // optionally with VECSHL if shift is required.
9685   if (Swap)
9686     std::swap(V1, V2);
9687   if (V2.isUndef())
9688     V2 = V1;
9689   if (ShiftElts) {
9690     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9691                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9692     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9693                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9694   }
9695   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9696                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
9697 }
9698 
9699 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9700 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9701 /// SDValue.
9702 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9703                                            SelectionDAG &DAG) const {
9704   const unsigned NumHalfWords = 8;
9705   const unsigned BytesInVector = NumHalfWords * 2;
9706   // Check that the shuffle is on half-words.
9707   if (!isNByteElemShuffleMask(N, 2, 1))
9708     return SDValue();
9709 
9710   bool IsLE = Subtarget.isLittleEndian();
9711   SDLoc dl(N);
9712   SDValue V1 = N->getOperand(0);
9713   SDValue V2 = N->getOperand(1);
9714   unsigned ShiftElts = 0, InsertAtByte = 0;
9715   bool Swap = false;
9716 
9717   // Shifts required to get the half-word we want at element 3.
9718   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9719   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9720 
9721   uint32_t Mask = 0;
9722   uint32_t OriginalOrderLow = 0x1234567;
9723   uint32_t OriginalOrderHigh = 0x89ABCDEF;
9724   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
9725   // 32-bit space, only need 4-bit nibbles per element.
9726   for (unsigned i = 0; i < NumHalfWords; ++i) {
9727     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9728     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9729   }
9730 
9731   // For each mask element, find out if we're just inserting something
9732   // from V2 into V1 or vice versa.  Possible permutations inserting an element
9733   // from V2 into V1:
9734   //   X, 1, 2, 3, 4, 5, 6, 7
9735   //   0, X, 2, 3, 4, 5, 6, 7
9736   //   0, 1, X, 3, 4, 5, 6, 7
9737   //   0, 1, 2, X, 4, 5, 6, 7
9738   //   0, 1, 2, 3, X, 5, 6, 7
9739   //   0, 1, 2, 3, 4, X, 6, 7
9740   //   0, 1, 2, 3, 4, 5, X, 7
9741   //   0, 1, 2, 3, 4, 5, 6, X
9742   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9743 
9744   bool FoundCandidate = false;
9745   // Go through the mask of half-words to find an element that's being moved
9746   // from one vector to the other.
9747   for (unsigned i = 0; i < NumHalfWords; ++i) {
9748     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9749     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9750     uint32_t MaskOtherElts = ~(0xF << MaskShift);
9751     uint32_t TargetOrder = 0x0;
9752 
9753     // If both vector operands for the shuffle are the same vector, the mask
9754     // will contain only elements from the first one and the second one will be
9755     // undef.
9756     if (V2.isUndef()) {
9757       ShiftElts = 0;
9758       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9759       TargetOrder = OriginalOrderLow;
9760       Swap = false;
9761       // Skip if not the correct element or mask of other elements don't equal
9762       // to our expected order.
9763       if (MaskOneElt == VINSERTHSrcElem &&
9764           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9765         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9766         FoundCandidate = true;
9767         break;
9768       }
9769     } else { // If both operands are defined.
9770       // Target order is [8,15] if the current mask is between [0,7].
9771       TargetOrder =
9772           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9773       // Skip if mask of other elements don't equal our expected order.
9774       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9775         // We only need the last 3 bits for the number of shifts.
9776         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9777                          : BigEndianShifts[MaskOneElt & 0x7];
9778         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9779         Swap = MaskOneElt < NumHalfWords;
9780         FoundCandidate = true;
9781         break;
9782       }
9783     }
9784   }
9785 
9786   if (!FoundCandidate)
9787     return SDValue();
9788 
9789   // Candidate found, construct the proper SDAG sequence with VINSERTH,
9790   // optionally with VECSHL if shift is required.
9791   if (Swap)
9792     std::swap(V1, V2);
9793   if (V2.isUndef())
9794     V2 = V1;
9795   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9796   if (ShiftElts) {
9797     // Double ShiftElts because we're left shifting on v16i8 type.
9798     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9799                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9800     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9801     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9802                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9803     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9804   }
9805   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9806   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9807                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
9808   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9809 }
9810 
9811 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9812 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9813 /// return the default SDValue.
9814 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9815                                               SelectionDAG &DAG) const {
9816   // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9817   // to v16i8. Peek through the bitcasts to get the actual operands.
9818   SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9819   SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9820 
9821   auto ShuffleMask = SVN->getMask();
9822   SDValue VecShuffle(SVN, 0);
9823   SDLoc DL(SVN);
9824 
9825   // Check that we have a four byte shuffle.
9826   if (!isNByteElemShuffleMask(SVN, 4, 1))
9827     return SDValue();
9828 
9829   // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9830   if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9831     std::swap(LHS, RHS);
9832     VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
9833     ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9834   }
9835 
9836   // Ensure that the RHS is a vector of constants.
9837   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9838   if (!BVN)
9839     return SDValue();
9840 
9841   // Check if RHS is a splat of 4-bytes (or smaller).
9842   APInt APSplatValue, APSplatUndef;
9843   unsigned SplatBitSize;
9844   bool HasAnyUndefs;
9845   if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9846                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9847       SplatBitSize > 32)
9848     return SDValue();
9849 
9850   // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9851   // The instruction splats a constant C into two words of the source vector
9852   // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9853   // Thus we check that the shuffle mask is the equivalent  of
9854   // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9855   // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9856   // within each word are consecutive, so we only need to check the first byte.
9857   SDValue Index;
9858   bool IsLE = Subtarget.isLittleEndian();
9859   if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9860       (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9861        ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9862     Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9863   else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9864            (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9865             ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9866     Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9867   else
9868     return SDValue();
9869 
9870   // If the splat is narrower than 32-bits, we need to get the 32-bit value
9871   // for XXSPLTI32DX.
9872   unsigned SplatVal = APSplatValue.getZExtValue();
9873   for (; SplatBitSize < 32; SplatBitSize <<= 1)
9874     SplatVal |= (SplatVal << SplatBitSize);
9875 
9876   SDValue SplatNode = DAG.getNode(
9877       PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9878       Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9879   return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9880 }
9881 
9882 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9883 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9884 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9885 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9886 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9887   assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9888   assert(Op.getValueType() == MVT::v1i128 &&
9889          "Only set v1i128 as custom, other type shouldn't reach here!");
9890   SDLoc dl(Op);
9891   SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9892   SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9893   unsigned SHLAmt = N1.getConstantOperandVal(0);
9894   if (SHLAmt % 8 == 0) {
9895     SmallVector<int, 16> Mask(16, 0);
9896     std::iota(Mask.begin(), Mask.end(), 0);
9897     std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9898     if (SDValue Shuffle =
9899             DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9900                                  DAG.getUNDEF(MVT::v16i8), Mask))
9901       return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9902   }
9903   SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9904   SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9905                               DAG.getConstant(SHLAmt, dl, MVT::i32));
9906   SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9907                               DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9908   SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9909   return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9910 }
9911 
9912 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
9913 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
9914 /// return the code it can be lowered into.  Worst case, it can always be
9915 /// lowered into a vperm.
9916 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9917                                                SelectionDAG &DAG) const {
9918   SDLoc dl(Op);
9919   SDValue V1 = Op.getOperand(0);
9920   SDValue V2 = Op.getOperand(1);
9921   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9922 
9923   // Any nodes that were combined in the target-independent combiner prior
9924   // to vector legalization will not be sent to the target combine. Try to
9925   // combine it here.
9926   if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9927     if (!isa<ShuffleVectorSDNode>(NewShuffle))
9928       return NewShuffle;
9929     Op = NewShuffle;
9930     SVOp = cast<ShuffleVectorSDNode>(Op);
9931     V1 = Op.getOperand(0);
9932     V2 = Op.getOperand(1);
9933   }
9934   EVT VT = Op.getValueType();
9935   bool isLittleEndian = Subtarget.isLittleEndian();
9936 
9937   unsigned ShiftElts, InsertAtByte;
9938   bool Swap = false;
9939 
9940   // If this is a load-and-splat, we can do that with a single instruction
9941   // in some cases. However if the load has multiple uses, we don't want to
9942   // combine it because that will just produce multiple loads.
9943   bool IsPermutedLoad = false;
9944   const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9945   if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9946       (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9947       InputLoad->hasOneUse()) {
9948     bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9949     int SplatIdx =
9950       PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9951 
9952     // The splat index for permuted loads will be in the left half of the vector
9953     // which is strictly wider than the loaded value by 8 bytes. So we need to
9954     // adjust the splat index to point to the correct address in memory.
9955     if (IsPermutedLoad) {
9956       assert(isLittleEndian && "Unexpected permuted load on big endian target");
9957       SplatIdx += IsFourByte ? 2 : 1;
9958       assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9959              "Splat of a value outside of the loaded memory");
9960     }
9961 
9962     LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9963     // For 4-byte load-and-splat, we need Power9.
9964     if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9965       uint64_t Offset = 0;
9966       if (IsFourByte)
9967         Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9968       else
9969         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9970 
9971       SDValue BasePtr = LD->getBasePtr();
9972       if (Offset != 0)
9973         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9974                               BasePtr, DAG.getIntPtrConstant(Offset, dl));
9975       SDValue Ops[] = {
9976         LD->getChain(),    // Chain
9977         BasePtr,           // BasePtr
9978         DAG.getValueType(Op.getValueType()) // VT
9979       };
9980       SDVTList VTL =
9981         DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9982       SDValue LdSplt =
9983         DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9984                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
9985       DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9986       if (LdSplt.getValueType() != SVOp->getValueType(0))
9987         LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9988       return LdSplt;
9989     }
9990   }
9991   if (Subtarget.hasP9Vector() &&
9992       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9993                            isLittleEndian)) {
9994     if (Swap)
9995       std::swap(V1, V2);
9996     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9997     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9998     if (ShiftElts) {
9999       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10000                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
10001       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10002                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10003       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10004     }
10005     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10006                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
10007     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10008   }
10009 
10010   if (Subtarget.hasPrefixInstrs()) {
10011     SDValue SplatInsertNode;
10012     if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10013       return SplatInsertNode;
10014   }
10015 
10016   if (Subtarget.hasP9Altivec()) {
10017     SDValue NewISDNode;
10018     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10019       return NewISDNode;
10020 
10021     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10022       return NewISDNode;
10023   }
10024 
10025   if (Subtarget.hasVSX() &&
10026       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10027     if (Swap)
10028       std::swap(V1, V2);
10029     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10030     SDValue Conv2 =
10031         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10032 
10033     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10034                               DAG.getConstant(ShiftElts, dl, MVT::i32));
10035     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10036   }
10037 
10038   if (Subtarget.hasVSX() &&
10039     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10040     if (Swap)
10041       std::swap(V1, V2);
10042     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10043     SDValue Conv2 =
10044         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10045 
10046     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10047                               DAG.getConstant(ShiftElts, dl, MVT::i32));
10048     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10049   }
10050 
10051   if (Subtarget.hasP9Vector()) {
10052      if (PPC::isXXBRHShuffleMask(SVOp)) {
10053       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10054       SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10055       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10056     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10057       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10058       SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10059       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10060     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10061       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10062       SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10063       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10064     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10065       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10066       SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10067       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10068     }
10069   }
10070 
10071   if (Subtarget.hasVSX()) {
10072     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10073       int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10074 
10075       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10076       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10077                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
10078       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10079     }
10080 
10081     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10082     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10083       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10084       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10085       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10086     }
10087   }
10088 
10089   // Cases that are handled by instructions that take permute immediates
10090   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10091   // selected by the instruction selector.
10092   if (V2.isUndef()) {
10093     if (PPC::isSplatShuffleMask(SVOp, 1) ||
10094         PPC::isSplatShuffleMask(SVOp, 2) ||
10095         PPC::isSplatShuffleMask(SVOp, 4) ||
10096         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10097         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10098         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10099         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10100         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10101         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10102         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10103         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10104         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10105         (Subtarget.hasP8Altivec() && (
10106          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10107          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10108          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10109       return Op;
10110     }
10111   }
10112 
10113   // Altivec has a variety of "shuffle immediates" that take two vector inputs
10114   // and produce a fixed permutation.  If any of these match, do not lower to
10115   // VPERM.
10116   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10117   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10118       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10119       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10120       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10121       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10122       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10123       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10124       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10125       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10126       (Subtarget.hasP8Altivec() && (
10127        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10128        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10129        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10130     return Op;
10131 
10132   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
10133   // perfect shuffle table to emit an optimal matching sequence.
10134   ArrayRef<int> PermMask = SVOp->getMask();
10135 
10136   unsigned PFIndexes[4];
10137   bool isFourElementShuffle = true;
10138   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
10139     unsigned EltNo = 8;   // Start out undef.
10140     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
10141       if (PermMask[i*4+j] < 0)
10142         continue;   // Undef, ignore it.
10143 
10144       unsigned ByteSource = PermMask[i*4+j];
10145       if ((ByteSource & 3) != j) {
10146         isFourElementShuffle = false;
10147         break;
10148       }
10149 
10150       if (EltNo == 8) {
10151         EltNo = ByteSource/4;
10152       } else if (EltNo != ByteSource/4) {
10153         isFourElementShuffle = false;
10154         break;
10155       }
10156     }
10157     PFIndexes[i] = EltNo;
10158   }
10159 
10160   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10161   // perfect shuffle vector to determine if it is cost effective to do this as
10162   // discrete instructions, or whether we should use a vperm.
10163   // For now, we skip this for little endian until such time as we have a
10164   // little-endian perfect shuffle table.
10165   if (isFourElementShuffle && !isLittleEndian) {
10166     // Compute the index in the perfect shuffle table.
10167     unsigned PFTableIndex =
10168       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
10169 
10170     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10171     unsigned Cost  = (PFEntry >> 30);
10172 
10173     // Determining when to avoid vperm is tricky.  Many things affect the cost
10174     // of vperm, particularly how many times the perm mask needs to be computed.
10175     // For example, if the perm mask can be hoisted out of a loop or is already
10176     // used (perhaps because there are multiple permutes with the same shuffle
10177     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
10178     // the loop requires an extra register.
10179     //
10180     // As a compromise, we only emit discrete instructions if the shuffle can be
10181     // generated in 3 or fewer operations.  When we have loop information
10182     // available, if this block is within a loop, we should avoid using vperm
10183     // for 3-operation perms and use a constant pool load instead.
10184     if (Cost < 3)
10185       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10186   }
10187 
10188   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10189   // vector that will get spilled to the constant pool.
10190   if (V2.isUndef()) V2 = V1;
10191 
10192   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10193   // that it is in input element units, not in bytes.  Convert now.
10194 
10195   // For little endian, the order of the input vectors is reversed, and
10196   // the permutation mask is complemented with respect to 31.  This is
10197   // necessary to produce proper semantics with the big-endian-biased vperm
10198   // instruction.
10199   EVT EltVT = V1.getValueType().getVectorElementType();
10200   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10201 
10202   SmallVector<SDValue, 16> ResultMask;
10203   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10204     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10205 
10206     for (unsigned j = 0; j != BytesPerElement; ++j)
10207       if (isLittleEndian)
10208         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10209                                              dl, MVT::i32));
10210       else
10211         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10212                                              MVT::i32));
10213   }
10214 
10215   ShufflesHandledWithVPERM++;
10216   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10217   LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
10218   LLVM_DEBUG(SVOp->dump());
10219   LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
10220   LLVM_DEBUG(VPermMask.dump());
10221 
10222   if (isLittleEndian)
10223     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10224                        V2, V1, VPermMask);
10225   else
10226     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10227                        V1, V2, VPermMask);
10228 }
10229 
10230 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10231 /// vector comparison.  If it is, return true and fill in Opc/isDot with
10232 /// information about the intrinsic.
10233 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10234                                  bool &isDot, const PPCSubtarget &Subtarget) {
10235   unsigned IntrinsicID =
10236       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10237   CompareOpc = -1;
10238   isDot = false;
10239   switch (IntrinsicID) {
10240   default:
10241     return false;
10242   // Comparison predicates.
10243   case Intrinsic::ppc_altivec_vcmpbfp_p:
10244     CompareOpc = 966;
10245     isDot = true;
10246     break;
10247   case Intrinsic::ppc_altivec_vcmpeqfp_p:
10248     CompareOpc = 198;
10249     isDot = true;
10250     break;
10251   case Intrinsic::ppc_altivec_vcmpequb_p:
10252     CompareOpc = 6;
10253     isDot = true;
10254     break;
10255   case Intrinsic::ppc_altivec_vcmpequh_p:
10256     CompareOpc = 70;
10257     isDot = true;
10258     break;
10259   case Intrinsic::ppc_altivec_vcmpequw_p:
10260     CompareOpc = 134;
10261     isDot = true;
10262     break;
10263   case Intrinsic::ppc_altivec_vcmpequd_p:
10264     if (Subtarget.hasP8Altivec()) {
10265       CompareOpc = 199;
10266       isDot = true;
10267     } else
10268       return false;
10269     break;
10270   case Intrinsic::ppc_altivec_vcmpneb_p:
10271   case Intrinsic::ppc_altivec_vcmpneh_p:
10272   case Intrinsic::ppc_altivec_vcmpnew_p:
10273   case Intrinsic::ppc_altivec_vcmpnezb_p:
10274   case Intrinsic::ppc_altivec_vcmpnezh_p:
10275   case Intrinsic::ppc_altivec_vcmpnezw_p:
10276     if (Subtarget.hasP9Altivec()) {
10277       switch (IntrinsicID) {
10278       default:
10279         llvm_unreachable("Unknown comparison intrinsic.");
10280       case Intrinsic::ppc_altivec_vcmpneb_p:
10281         CompareOpc = 7;
10282         break;
10283       case Intrinsic::ppc_altivec_vcmpneh_p:
10284         CompareOpc = 71;
10285         break;
10286       case Intrinsic::ppc_altivec_vcmpnew_p:
10287         CompareOpc = 135;
10288         break;
10289       case Intrinsic::ppc_altivec_vcmpnezb_p:
10290         CompareOpc = 263;
10291         break;
10292       case Intrinsic::ppc_altivec_vcmpnezh_p:
10293         CompareOpc = 327;
10294         break;
10295       case Intrinsic::ppc_altivec_vcmpnezw_p:
10296         CompareOpc = 391;
10297         break;
10298       }
10299       isDot = true;
10300     } else
10301       return false;
10302     break;
10303   case Intrinsic::ppc_altivec_vcmpgefp_p:
10304     CompareOpc = 454;
10305     isDot = true;
10306     break;
10307   case Intrinsic::ppc_altivec_vcmpgtfp_p:
10308     CompareOpc = 710;
10309     isDot = true;
10310     break;
10311   case Intrinsic::ppc_altivec_vcmpgtsb_p:
10312     CompareOpc = 774;
10313     isDot = true;
10314     break;
10315   case Intrinsic::ppc_altivec_vcmpgtsh_p:
10316     CompareOpc = 838;
10317     isDot = true;
10318     break;
10319   case Intrinsic::ppc_altivec_vcmpgtsw_p:
10320     CompareOpc = 902;
10321     isDot = true;
10322     break;
10323   case Intrinsic::ppc_altivec_vcmpgtsd_p:
10324     if (Subtarget.hasP8Altivec()) {
10325       CompareOpc = 967;
10326       isDot = true;
10327     } else
10328       return false;
10329     break;
10330   case Intrinsic::ppc_altivec_vcmpgtub_p:
10331     CompareOpc = 518;
10332     isDot = true;
10333     break;
10334   case Intrinsic::ppc_altivec_vcmpgtuh_p:
10335     CompareOpc = 582;
10336     isDot = true;
10337     break;
10338   case Intrinsic::ppc_altivec_vcmpgtuw_p:
10339     CompareOpc = 646;
10340     isDot = true;
10341     break;
10342   case Intrinsic::ppc_altivec_vcmpgtud_p:
10343     if (Subtarget.hasP8Altivec()) {
10344       CompareOpc = 711;
10345       isDot = true;
10346     } else
10347       return false;
10348     break;
10349 
10350   case Intrinsic::ppc_altivec_vcmpequq:
10351   case Intrinsic::ppc_altivec_vcmpgtsq:
10352   case Intrinsic::ppc_altivec_vcmpgtuq:
10353     if (!Subtarget.isISA3_1())
10354       return false;
10355     switch (IntrinsicID) {
10356     default:
10357       llvm_unreachable("Unknown comparison intrinsic.");
10358     case Intrinsic::ppc_altivec_vcmpequq:
10359       CompareOpc = 455;
10360       break;
10361     case Intrinsic::ppc_altivec_vcmpgtsq:
10362       CompareOpc = 903;
10363       break;
10364     case Intrinsic::ppc_altivec_vcmpgtuq:
10365       CompareOpc = 647;
10366       break;
10367     }
10368     break;
10369 
10370   // VSX predicate comparisons use the same infrastructure
10371   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10372   case Intrinsic::ppc_vsx_xvcmpgedp_p:
10373   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10374   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10375   case Intrinsic::ppc_vsx_xvcmpgesp_p:
10376   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10377     if (Subtarget.hasVSX()) {
10378       switch (IntrinsicID) {
10379       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10380         CompareOpc = 99;
10381         break;
10382       case Intrinsic::ppc_vsx_xvcmpgedp_p:
10383         CompareOpc = 115;
10384         break;
10385       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10386         CompareOpc = 107;
10387         break;
10388       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10389         CompareOpc = 67;
10390         break;
10391       case Intrinsic::ppc_vsx_xvcmpgesp_p:
10392         CompareOpc = 83;
10393         break;
10394       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10395         CompareOpc = 75;
10396         break;
10397       }
10398       isDot = true;
10399     } else
10400       return false;
10401     break;
10402 
10403   // Normal Comparisons.
10404   case Intrinsic::ppc_altivec_vcmpbfp:
10405     CompareOpc = 966;
10406     break;
10407   case Intrinsic::ppc_altivec_vcmpeqfp:
10408     CompareOpc = 198;
10409     break;
10410   case Intrinsic::ppc_altivec_vcmpequb:
10411     CompareOpc = 6;
10412     break;
10413   case Intrinsic::ppc_altivec_vcmpequh:
10414     CompareOpc = 70;
10415     break;
10416   case Intrinsic::ppc_altivec_vcmpequw:
10417     CompareOpc = 134;
10418     break;
10419   case Intrinsic::ppc_altivec_vcmpequd:
10420     if (Subtarget.hasP8Altivec())
10421       CompareOpc = 199;
10422     else
10423       return false;
10424     break;
10425   case Intrinsic::ppc_altivec_vcmpneb:
10426   case Intrinsic::ppc_altivec_vcmpneh:
10427   case Intrinsic::ppc_altivec_vcmpnew:
10428   case Intrinsic::ppc_altivec_vcmpnezb:
10429   case Intrinsic::ppc_altivec_vcmpnezh:
10430   case Intrinsic::ppc_altivec_vcmpnezw:
10431     if (Subtarget.hasP9Altivec())
10432       switch (IntrinsicID) {
10433       default:
10434         llvm_unreachable("Unknown comparison intrinsic.");
10435       case Intrinsic::ppc_altivec_vcmpneb:
10436         CompareOpc = 7;
10437         break;
10438       case Intrinsic::ppc_altivec_vcmpneh:
10439         CompareOpc = 71;
10440         break;
10441       case Intrinsic::ppc_altivec_vcmpnew:
10442         CompareOpc = 135;
10443         break;
10444       case Intrinsic::ppc_altivec_vcmpnezb:
10445         CompareOpc = 263;
10446         break;
10447       case Intrinsic::ppc_altivec_vcmpnezh:
10448         CompareOpc = 327;
10449         break;
10450       case Intrinsic::ppc_altivec_vcmpnezw:
10451         CompareOpc = 391;
10452         break;
10453       }
10454     else
10455       return false;
10456     break;
10457   case Intrinsic::ppc_altivec_vcmpgefp:
10458     CompareOpc = 454;
10459     break;
10460   case Intrinsic::ppc_altivec_vcmpgtfp:
10461     CompareOpc = 710;
10462     break;
10463   case Intrinsic::ppc_altivec_vcmpgtsb:
10464     CompareOpc = 774;
10465     break;
10466   case Intrinsic::ppc_altivec_vcmpgtsh:
10467     CompareOpc = 838;
10468     break;
10469   case Intrinsic::ppc_altivec_vcmpgtsw:
10470     CompareOpc = 902;
10471     break;
10472   case Intrinsic::ppc_altivec_vcmpgtsd:
10473     if (Subtarget.hasP8Altivec())
10474       CompareOpc = 967;
10475     else
10476       return false;
10477     break;
10478   case Intrinsic::ppc_altivec_vcmpgtub:
10479     CompareOpc = 518;
10480     break;
10481   case Intrinsic::ppc_altivec_vcmpgtuh:
10482     CompareOpc = 582;
10483     break;
10484   case Intrinsic::ppc_altivec_vcmpgtuw:
10485     CompareOpc = 646;
10486     break;
10487   case Intrinsic::ppc_altivec_vcmpgtud:
10488     if (Subtarget.hasP8Altivec())
10489       CompareOpc = 711;
10490     else
10491       return false;
10492     break;
10493   case Intrinsic::ppc_altivec_vcmpequq_p:
10494   case Intrinsic::ppc_altivec_vcmpgtsq_p:
10495   case Intrinsic::ppc_altivec_vcmpgtuq_p:
10496     if (!Subtarget.isISA3_1())
10497       return false;
10498     switch (IntrinsicID) {
10499     default:
10500       llvm_unreachable("Unknown comparison intrinsic.");
10501     case Intrinsic::ppc_altivec_vcmpequq_p:
10502       CompareOpc = 455;
10503       break;
10504     case Intrinsic::ppc_altivec_vcmpgtsq_p:
10505       CompareOpc = 903;
10506       break;
10507     case Intrinsic::ppc_altivec_vcmpgtuq_p:
10508       CompareOpc = 647;
10509       break;
10510     }
10511     isDot = true;
10512     break;
10513   }
10514   return true;
10515 }
10516 
10517 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10518 /// lower, do it, otherwise return null.
10519 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10520                                                    SelectionDAG &DAG) const {
10521   unsigned IntrinsicID =
10522     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10523 
10524   SDLoc dl(Op);
10525 
10526   switch (IntrinsicID) {
10527   case Intrinsic::thread_pointer:
10528     // Reads the thread pointer register, used for __builtin_thread_pointer.
10529     if (Subtarget.isPPC64())
10530       return DAG.getRegister(PPC::X13, MVT::i64);
10531     return DAG.getRegister(PPC::R2, MVT::i32);
10532 
10533   case Intrinsic::ppc_mma_disassemble_acc:
10534   case Intrinsic::ppc_mma_disassemble_pair: {
10535     int NumVecs = 2;
10536     SDValue WideVec = Op.getOperand(1);
10537     if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10538       NumVecs = 4;
10539       WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10540     }
10541     SmallVector<SDValue, 4> RetOps;
10542     for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10543       SDValue Extract = DAG.getNode(
10544           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10545           DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10546                                                      : VecNo,
10547                           dl, MVT::i64));
10548       RetOps.push_back(Extract);
10549     }
10550     return DAG.getMergeValues(RetOps, dl);
10551   }
10552   }
10553 
10554   // If this is a lowered altivec predicate compare, CompareOpc is set to the
10555   // opcode number of the comparison.
10556   int CompareOpc;
10557   bool isDot;
10558   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10559     return SDValue();    // Don't custom lower most intrinsics.
10560 
10561   // If this is a non-dot comparison, make the VCMP node and we are done.
10562   if (!isDot) {
10563     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10564                               Op.getOperand(1), Op.getOperand(2),
10565                               DAG.getConstant(CompareOpc, dl, MVT::i32));
10566     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10567   }
10568 
10569   // Create the PPCISD altivec 'dot' comparison node.
10570   SDValue Ops[] = {
10571     Op.getOperand(2),  // LHS
10572     Op.getOperand(3),  // RHS
10573     DAG.getConstant(CompareOpc, dl, MVT::i32)
10574   };
10575   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10576   SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10577 
10578   // Now that we have the comparison, emit a copy from the CR to a GPR.
10579   // This is flagged to the above dot comparison.
10580   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10581                                 DAG.getRegister(PPC::CR6, MVT::i32),
10582                                 CompNode.getValue(1));
10583 
10584   // Unpack the result based on how the target uses it.
10585   unsigned BitNo;   // Bit # of CR6.
10586   bool InvertBit;   // Invert result?
10587   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10588   default:  // Can't happen, don't crash on invalid number though.
10589   case 0:   // Return the value of the EQ bit of CR6.
10590     BitNo = 0; InvertBit = false;
10591     break;
10592   case 1:   // Return the inverted value of the EQ bit of CR6.
10593     BitNo = 0; InvertBit = true;
10594     break;
10595   case 2:   // Return the value of the LT bit of CR6.
10596     BitNo = 2; InvertBit = false;
10597     break;
10598   case 3:   // Return the inverted value of the LT bit of CR6.
10599     BitNo = 2; InvertBit = true;
10600     break;
10601   }
10602 
10603   // Shift the bit into the low position.
10604   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10605                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10606   // Isolate the bit.
10607   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10608                       DAG.getConstant(1, dl, MVT::i32));
10609 
10610   // If we are supposed to, toggle the bit.
10611   if (InvertBit)
10612     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10613                         DAG.getConstant(1, dl, MVT::i32));
10614   return Flags;
10615 }
10616 
10617 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10618                                                SelectionDAG &DAG) const {
10619   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10620   // the beginning of the argument list.
10621   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10622   SDLoc DL(Op);
10623   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10624   case Intrinsic::ppc_cfence: {
10625     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10626     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10627     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10628                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
10629                                                   Op.getOperand(ArgStart + 1)),
10630                                       Op.getOperand(0)),
10631                    0);
10632   }
10633   default:
10634     break;
10635   }
10636   return SDValue();
10637 }
10638 
10639 // Lower scalar BSWAP64 to xxbrd.
10640 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10641   SDLoc dl(Op);
10642   // MTVSRDD
10643   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10644                    Op.getOperand(0));
10645   // XXBRD
10646   Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10647   // MFVSRD
10648   int VectorIndex = 0;
10649   if (Subtarget.isLittleEndian())
10650     VectorIndex = 1;
10651   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10652                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10653   return Op;
10654 }
10655 
10656 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10657 // compared to a value that is atomically loaded (atomic loads zero-extend).
10658 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10659                                                 SelectionDAG &DAG) const {
10660   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10661          "Expecting an atomic compare-and-swap here.");
10662   SDLoc dl(Op);
10663   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10664   EVT MemVT = AtomicNode->getMemoryVT();
10665   if (MemVT.getSizeInBits() >= 32)
10666     return Op;
10667 
10668   SDValue CmpOp = Op.getOperand(2);
10669   // If this is already correctly zero-extended, leave it alone.
10670   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10671   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10672     return Op;
10673 
10674   // Clear the high bits of the compare operand.
10675   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10676   SDValue NewCmpOp =
10677     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10678                 DAG.getConstant(MaskVal, dl, MVT::i32));
10679 
10680   // Replace the existing compare operand with the properly zero-extended one.
10681   SmallVector<SDValue, 4> Ops;
10682   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10683     Ops.push_back(AtomicNode->getOperand(i));
10684   Ops[2] = NewCmpOp;
10685   MachineMemOperand *MMO = AtomicNode->getMemOperand();
10686   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10687   auto NodeTy =
10688     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10689   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10690 }
10691 
10692 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10693                                                  SelectionDAG &DAG) const {
10694   SDLoc dl(Op);
10695   // Create a stack slot that is 16-byte aligned.
10696   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10697   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10698   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10699   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10700 
10701   // Store the input value into Value#0 of the stack slot.
10702   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10703                                MachinePointerInfo());
10704   // Load it out.
10705   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10706 }
10707 
10708 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10709                                                   SelectionDAG &DAG) const {
10710   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10711          "Should only be called for ISD::INSERT_VECTOR_ELT");
10712 
10713   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10714   // We have legal lowering for constant indices but not for variable ones.
10715   if (!C)
10716     return SDValue();
10717 
10718   EVT VT = Op.getValueType();
10719   SDLoc dl(Op);
10720   SDValue V1 = Op.getOperand(0);
10721   SDValue V2 = Op.getOperand(1);
10722   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10723   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10724     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10725     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10726     unsigned InsertAtElement = C->getZExtValue();
10727     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10728     if (Subtarget.isLittleEndian()) {
10729       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10730     }
10731     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10732                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
10733   }
10734   return Op;
10735 }
10736 
10737 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10738                                            SelectionDAG &DAG) const {
10739   SDLoc dl(Op);
10740   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10741   SDValue LoadChain = LN->getChain();
10742   SDValue BasePtr = LN->getBasePtr();
10743   EVT VT = Op.getValueType();
10744 
10745   if (VT != MVT::v256i1 && VT != MVT::v512i1)
10746     return Op;
10747 
10748   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10749   // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10750   // 2 or 4 vsx registers.
10751   assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10752          "Type unsupported without MMA");
10753   assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10754          "Type unsupported without paired vector support");
10755   Align Alignment = LN->getAlign();
10756   SmallVector<SDValue, 4> Loads;
10757   SmallVector<SDValue, 4> LoadChains;
10758   unsigned NumVecs = VT.getSizeInBits() / 128;
10759   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10760     SDValue Load =
10761         DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10762                     LN->getPointerInfo().getWithOffset(Idx * 16),
10763                     commonAlignment(Alignment, Idx * 16),
10764                     LN->getMemOperand()->getFlags(), LN->getAAInfo());
10765     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10766                           DAG.getConstant(16, dl, BasePtr.getValueType()));
10767     Loads.push_back(Load);
10768     LoadChains.push_back(Load.getValue(1));
10769   }
10770   if (Subtarget.isLittleEndian()) {
10771     std::reverse(Loads.begin(), Loads.end());
10772     std::reverse(LoadChains.begin(), LoadChains.end());
10773   }
10774   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10775   SDValue Value =
10776       DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
10777                   dl, VT, Loads);
10778   SDValue RetOps[] = {Value, TF};
10779   return DAG.getMergeValues(RetOps, dl);
10780 }
10781 
10782 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10783                                             SelectionDAG &DAG) const {
10784   SDLoc dl(Op);
10785   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10786   SDValue StoreChain = SN->getChain();
10787   SDValue BasePtr = SN->getBasePtr();
10788   SDValue Value = SN->getValue();
10789   EVT StoreVT = Value.getValueType();
10790 
10791   if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10792     return Op;
10793 
10794   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10795   // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10796   // underlying registers individually.
10797   assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10798          "Type unsupported without MMA");
10799   assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10800          "Type unsupported without paired vector support");
10801   Align Alignment = SN->getAlign();
10802   SmallVector<SDValue, 4> Stores;
10803   unsigned NumVecs = 2;
10804   if (StoreVT == MVT::v512i1) {
10805     Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
10806     NumVecs = 4;
10807   }
10808   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10809     unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10810     SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
10811                               DAG.getConstant(VecNum, dl, MVT::i64));
10812     SDValue Store =
10813         DAG.getStore(StoreChain, dl, Elt, BasePtr,
10814                      SN->getPointerInfo().getWithOffset(Idx * 16),
10815                      commonAlignment(Alignment, Idx * 16),
10816                      SN->getMemOperand()->getFlags(), SN->getAAInfo());
10817     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10818                           DAG.getConstant(16, dl, BasePtr.getValueType()));
10819     Stores.push_back(Store);
10820   }
10821   SDValue TF = DAG.getTokenFactor(dl, Stores);
10822   return TF;
10823 }
10824 
10825 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10826   SDLoc dl(Op);
10827   if (Op.getValueType() == MVT::v4i32) {
10828     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10829 
10830     SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10831     // +16 as shift amt.
10832     SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10833     SDValue RHSSwap =   // = vrlw RHS, 16
10834       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10835 
10836     // Shrinkify inputs to v8i16.
10837     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10838     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10839     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10840 
10841     // Low parts multiplied together, generating 32-bit results (we ignore the
10842     // top parts).
10843     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10844                                         LHS, RHS, DAG, dl, MVT::v4i32);
10845 
10846     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10847                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10848     // Shift the high parts up 16 bits.
10849     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10850                               Neg16, DAG, dl);
10851     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10852   } else if (Op.getValueType() == MVT::v16i8) {
10853     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10854     bool isLittleEndian = Subtarget.isLittleEndian();
10855 
10856     // Multiply the even 8-bit parts, producing 16-bit sums.
10857     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10858                                            LHS, RHS, DAG, dl, MVT::v8i16);
10859     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10860 
10861     // Multiply the odd 8-bit parts, producing 16-bit sums.
10862     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10863                                           LHS, RHS, DAG, dl, MVT::v8i16);
10864     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10865 
10866     // Merge the results together.  Because vmuleub and vmuloub are
10867     // instructions with a big-endian bias, we must reverse the
10868     // element numbering and reverse the meaning of "odd" and "even"
10869     // when generating little endian code.
10870     int Ops[16];
10871     for (unsigned i = 0; i != 8; ++i) {
10872       if (isLittleEndian) {
10873         Ops[i*2  ] = 2*i;
10874         Ops[i*2+1] = 2*i+16;
10875       } else {
10876         Ops[i*2  ] = 2*i+1;
10877         Ops[i*2+1] = 2*i+1+16;
10878       }
10879     }
10880     if (isLittleEndian)
10881       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10882     else
10883       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10884   } else {
10885     llvm_unreachable("Unknown mul to lower!");
10886   }
10887 }
10888 
10889 SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
10890 
10891   assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS");
10892 
10893   EVT VT = Op.getValueType();
10894   assert(VT.isVector() &&
10895          "Only set vector abs as custom, scalar abs shouldn't reach here!");
10896   assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
10897           VT == MVT::v16i8) &&
10898          "Unexpected vector element type!");
10899   assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) &&
10900          "Current subtarget doesn't support smax v2i64!");
10901 
10902   // For vector abs, it can be lowered to:
10903   // abs x
10904   // ==>
10905   // y = -x
10906   // smax(x, y)
10907 
10908   SDLoc dl(Op);
10909   SDValue X = Op.getOperand(0);
10910   SDValue Zero = DAG.getConstant(0, dl, VT);
10911   SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X);
10912 
10913   // SMAX patch https://reviews.llvm.org/D47332
10914   // hasn't landed yet, so use intrinsic first here.
10915   // TODO: Should use SMAX directly once SMAX patch landed
10916   Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw;
10917   if (VT == MVT::v2i64)
10918     BifID = Intrinsic::ppc_altivec_vmaxsd;
10919   else if (VT == MVT::v8i16)
10920     BifID = Intrinsic::ppc_altivec_vmaxsh;
10921   else if (VT == MVT::v16i8)
10922     BifID = Intrinsic::ppc_altivec_vmaxsb;
10923 
10924   return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
10925 }
10926 
10927 // Custom lowering for fpext vf32 to v2f64
10928 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10929 
10930   assert(Op.getOpcode() == ISD::FP_EXTEND &&
10931          "Should only be called for ISD::FP_EXTEND");
10932 
10933   // FIXME: handle extends from half precision float vectors on P9.
10934   // We only want to custom lower an extend from v2f32 to v2f64.
10935   if (Op.getValueType() != MVT::v2f64 ||
10936       Op.getOperand(0).getValueType() != MVT::v2f32)
10937     return SDValue();
10938 
10939   SDLoc dl(Op);
10940   SDValue Op0 = Op.getOperand(0);
10941 
10942   switch (Op0.getOpcode()) {
10943   default:
10944     return SDValue();
10945   case ISD::EXTRACT_SUBVECTOR: {
10946     assert(Op0.getNumOperands() == 2 &&
10947            isa<ConstantSDNode>(Op0->getOperand(1)) &&
10948            "Node should have 2 operands with second one being a constant!");
10949 
10950     if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10951       return SDValue();
10952 
10953     // Custom lower is only done for high or low doubleword.
10954     int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10955     if (Idx % 2 != 0)
10956       return SDValue();
10957 
10958     // Since input is v4f32, at this point Idx is either 0 or 2.
10959     // Shift to get the doubleword position we want.
10960     int DWord = Idx >> 1;
10961 
10962     // High and low word positions are different on little endian.
10963     if (Subtarget.isLittleEndian())
10964       DWord ^= 0x1;
10965 
10966     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10967                        Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10968   }
10969   case ISD::FADD:
10970   case ISD::FMUL:
10971   case ISD::FSUB: {
10972     SDValue NewLoad[2];
10973     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10974       // Ensure both input are loads.
10975       SDValue LdOp = Op0.getOperand(i);
10976       if (LdOp.getOpcode() != ISD::LOAD)
10977         return SDValue();
10978       // Generate new load node.
10979       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10980       SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10981       NewLoad[i] = DAG.getMemIntrinsicNode(
10982           PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10983           LD->getMemoryVT(), LD->getMemOperand());
10984     }
10985     SDValue NewOp =
10986         DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10987                     NewLoad[1], Op0.getNode()->getFlags());
10988     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10989                        DAG.getConstant(0, dl, MVT::i32));
10990   }
10991   case ISD::LOAD: {
10992     LoadSDNode *LD = cast<LoadSDNode>(Op0);
10993     SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10994     SDValue NewLd = DAG.getMemIntrinsicNode(
10995         PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10996         LD->getMemoryVT(), LD->getMemOperand());
10997     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10998                        DAG.getConstant(0, dl, MVT::i32));
10999   }
11000   }
11001   llvm_unreachable("ERROR:Should return for all cases within swtich.");
11002 }
11003 
11004 /// LowerOperation - Provide custom lowering hooks for some operations.
11005 ///
11006 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
11007   switch (Op.getOpcode()) {
11008   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11009   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
11010   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
11011   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
11012   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
11013   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
11014   case ISD::SETCC:              return LowerSETCC(Op, DAG);
11015   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
11016   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
11017 
11018   // Variable argument lowering.
11019   case ISD::VASTART:            return LowerVASTART(Op, DAG);
11020   case ISD::VAARG:              return LowerVAARG(Op, DAG);
11021   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
11022 
11023   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
11024   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11025   case ISD::GET_DYNAMIC_AREA_OFFSET:
11026     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11027 
11028   // Exception handling lowering.
11029   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
11030   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
11031   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
11032 
11033   case ISD::LOAD:               return LowerLOAD(Op, DAG);
11034   case ISD::STORE:              return LowerSTORE(Op, DAG);
11035   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
11036   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
11037   case ISD::STRICT_FP_TO_UINT:
11038   case ISD::STRICT_FP_TO_SINT:
11039   case ISD::FP_TO_UINT:
11040   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11041   case ISD::STRICT_UINT_TO_FP:
11042   case ISD::STRICT_SINT_TO_FP:
11043   case ISD::UINT_TO_FP:
11044   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
11045   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
11046 
11047   // Lower 64-bit shifts.
11048   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
11049   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
11050   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
11051 
11052   case ISD::FSHL:               return LowerFunnelShift(Op, DAG);
11053   case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
11054 
11055   // Vector-related lowering.
11056   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
11057   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
11058   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11059   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
11060   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
11061   case ISD::MUL:                return LowerMUL(Op, DAG);
11062   case ISD::ABS:                return LowerABS(Op, DAG);
11063   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
11064   case ISD::ROTL:               return LowerROTL(Op, DAG);
11065 
11066   // For counter-based loop handling.
11067   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
11068 
11069   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
11070 
11071   // Frame & Return address.
11072   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
11073   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
11074 
11075   case ISD::INTRINSIC_VOID:
11076     return LowerINTRINSIC_VOID(Op, DAG);
11077   case ISD::BSWAP:
11078     return LowerBSWAP(Op, DAG);
11079   case ISD::ATOMIC_CMP_SWAP:
11080     return LowerATOMIC_CMP_SWAP(Op, DAG);
11081   }
11082 }
11083 
11084 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
11085                                            SmallVectorImpl<SDValue>&Results,
11086                                            SelectionDAG &DAG) const {
11087   SDLoc dl(N);
11088   switch (N->getOpcode()) {
11089   default:
11090     llvm_unreachable("Do not know how to custom type legalize this operation!");
11091   case ISD::READCYCLECOUNTER: {
11092     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11093     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11094 
11095     Results.push_back(
11096         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11097     Results.push_back(RTB.getValue(2));
11098     break;
11099   }
11100   case ISD::INTRINSIC_W_CHAIN: {
11101     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11102         Intrinsic::loop_decrement)
11103       break;
11104 
11105     assert(N->getValueType(0) == MVT::i1 &&
11106            "Unexpected result type for CTR decrement intrinsic");
11107     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11108                                  N->getValueType(0));
11109     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11110     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11111                                  N->getOperand(1));
11112 
11113     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11114     Results.push_back(NewInt.getValue(1));
11115     break;
11116   }
11117   case ISD::VAARG: {
11118     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11119       return;
11120 
11121     EVT VT = N->getValueType(0);
11122 
11123     if (VT == MVT::i64) {
11124       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11125 
11126       Results.push_back(NewNode);
11127       Results.push_back(NewNode.getValue(1));
11128     }
11129     return;
11130   }
11131   case ISD::STRICT_FP_TO_SINT:
11132   case ISD::STRICT_FP_TO_UINT:
11133   case ISD::FP_TO_SINT:
11134   case ISD::FP_TO_UINT:
11135     // LowerFP_TO_INT() can only handle f32 and f64.
11136     if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11137         MVT::ppcf128)
11138       return;
11139     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
11140     return;
11141   case ISD::TRUNCATE: {
11142     if (!N->getValueType(0).isVector())
11143       return;
11144     SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11145     if (Lowered)
11146       Results.push_back(Lowered);
11147     return;
11148   }
11149   case ISD::FSHL:
11150   case ISD::FSHR:
11151     // Don't handle funnel shifts here.
11152     return;
11153   case ISD::BITCAST:
11154     // Don't handle bitcast here.
11155     return;
11156   case ISD::FP_EXTEND:
11157     SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11158     if (Lowered)
11159       Results.push_back(Lowered);
11160     return;
11161   }
11162 }
11163 
11164 //===----------------------------------------------------------------------===//
11165 //  Other Lowering Code
11166 //===----------------------------------------------------------------------===//
11167 
11168 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
11169   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11170   Function *Func = Intrinsic::getDeclaration(M, Id);
11171   return Builder.CreateCall(Func, {});
11172 }
11173 
11174 // The mappings for emitLeading/TrailingFence is taken from
11175 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11176 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
11177                                                  Instruction *Inst,
11178                                                  AtomicOrdering Ord) const {
11179   if (Ord == AtomicOrdering::SequentiallyConsistent)
11180     return callIntrinsic(Builder, Intrinsic::ppc_sync);
11181   if (isReleaseOrStronger(Ord))
11182     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11183   return nullptr;
11184 }
11185 
11186 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
11187                                                   Instruction *Inst,
11188                                                   AtomicOrdering Ord) const {
11189   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11190     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11191     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11192     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11193     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11194       return Builder.CreateCall(
11195           Intrinsic::getDeclaration(
11196               Builder.GetInsertBlock()->getParent()->getParent(),
11197               Intrinsic::ppc_cfence, {Inst->getType()}),
11198           {Inst});
11199     // FIXME: Can use isync for rmw operation.
11200     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11201   }
11202   return nullptr;
11203 }
11204 
11205 MachineBasicBlock *
11206 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
11207                                     unsigned AtomicSize,
11208                                     unsigned BinOpcode,
11209                                     unsigned CmpOpcode,
11210                                     unsigned CmpPred) const {
11211   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11212   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11213 
11214   auto LoadMnemonic = PPC::LDARX;
11215   auto StoreMnemonic = PPC::STDCX;
11216   switch (AtomicSize) {
11217   default:
11218     llvm_unreachable("Unexpected size of atomic entity");
11219   case 1:
11220     LoadMnemonic = PPC::LBARX;
11221     StoreMnemonic = PPC::STBCX;
11222     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11223     break;
11224   case 2:
11225     LoadMnemonic = PPC::LHARX;
11226     StoreMnemonic = PPC::STHCX;
11227     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11228     break;
11229   case 4:
11230     LoadMnemonic = PPC::LWARX;
11231     StoreMnemonic = PPC::STWCX;
11232     break;
11233   case 8:
11234     LoadMnemonic = PPC::LDARX;
11235     StoreMnemonic = PPC::STDCX;
11236     break;
11237   }
11238 
11239   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11240   MachineFunction *F = BB->getParent();
11241   MachineFunction::iterator It = ++BB->getIterator();
11242 
11243   Register dest = MI.getOperand(0).getReg();
11244   Register ptrA = MI.getOperand(1).getReg();
11245   Register ptrB = MI.getOperand(2).getReg();
11246   Register incr = MI.getOperand(3).getReg();
11247   DebugLoc dl = MI.getDebugLoc();
11248 
11249   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11250   MachineBasicBlock *loop2MBB =
11251     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11252   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11253   F->insert(It, loopMBB);
11254   if (CmpOpcode)
11255     F->insert(It, loop2MBB);
11256   F->insert(It, exitMBB);
11257   exitMBB->splice(exitMBB->begin(), BB,
11258                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11259   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11260 
11261   MachineRegisterInfo &RegInfo = F->getRegInfo();
11262   Register TmpReg = (!BinOpcode) ? incr :
11263     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11264                                            : &PPC::GPRCRegClass);
11265 
11266   //  thisMBB:
11267   //   ...
11268   //   fallthrough --> loopMBB
11269   BB->addSuccessor(loopMBB);
11270 
11271   //  loopMBB:
11272   //   l[wd]arx dest, ptr
11273   //   add r0, dest, incr
11274   //   st[wd]cx. r0, ptr
11275   //   bne- loopMBB
11276   //   fallthrough --> exitMBB
11277 
11278   // For max/min...
11279   //  loopMBB:
11280   //   l[wd]arx dest, ptr
11281   //   cmpl?[wd] incr, dest
11282   //   bgt exitMBB
11283   //  loop2MBB:
11284   //   st[wd]cx. dest, ptr
11285   //   bne- loopMBB
11286   //   fallthrough --> exitMBB
11287 
11288   BB = loopMBB;
11289   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11290     .addReg(ptrA).addReg(ptrB);
11291   if (BinOpcode)
11292     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11293   if (CmpOpcode) {
11294     // Signed comparisons of byte or halfword values must be sign-extended.
11295     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11296       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11297       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11298               ExtReg).addReg(dest);
11299       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11300         .addReg(incr).addReg(ExtReg);
11301     } else
11302       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11303         .addReg(incr).addReg(dest);
11304 
11305     BuildMI(BB, dl, TII->get(PPC::BCC))
11306       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11307     BB->addSuccessor(loop2MBB);
11308     BB->addSuccessor(exitMBB);
11309     BB = loop2MBB;
11310   }
11311   BuildMI(BB, dl, TII->get(StoreMnemonic))
11312     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11313   BuildMI(BB, dl, TII->get(PPC::BCC))
11314     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11315   BB->addSuccessor(loopMBB);
11316   BB->addSuccessor(exitMBB);
11317 
11318   //  exitMBB:
11319   //   ...
11320   BB = exitMBB;
11321   return BB;
11322 }
11323 
11324 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
11325     MachineInstr &MI, MachineBasicBlock *BB,
11326     bool is8bit, // operation
11327     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11328   // If we support part-word atomic mnemonics, just use them
11329   if (Subtarget.hasPartwordAtomics())
11330     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11331                             CmpPred);
11332 
11333   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11334   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11335   // In 64 bit mode we have to use 64 bits for addresses, even though the
11336   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
11337   // registers without caring whether they're 32 or 64, but here we're
11338   // doing actual arithmetic on the addresses.
11339   bool is64bit = Subtarget.isPPC64();
11340   bool isLittleEndian = Subtarget.isLittleEndian();
11341   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11342 
11343   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11344   MachineFunction *F = BB->getParent();
11345   MachineFunction::iterator It = ++BB->getIterator();
11346 
11347   Register dest = MI.getOperand(0).getReg();
11348   Register ptrA = MI.getOperand(1).getReg();
11349   Register ptrB = MI.getOperand(2).getReg();
11350   Register incr = MI.getOperand(3).getReg();
11351   DebugLoc dl = MI.getDebugLoc();
11352 
11353   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11354   MachineBasicBlock *loop2MBB =
11355       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11356   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11357   F->insert(It, loopMBB);
11358   if (CmpOpcode)
11359     F->insert(It, loop2MBB);
11360   F->insert(It, exitMBB);
11361   exitMBB->splice(exitMBB->begin(), BB,
11362                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11363   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11364 
11365   MachineRegisterInfo &RegInfo = F->getRegInfo();
11366   const TargetRegisterClass *RC =
11367       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11368   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11369 
11370   Register PtrReg = RegInfo.createVirtualRegister(RC);
11371   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11372   Register ShiftReg =
11373       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11374   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11375   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11376   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11377   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11378   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11379   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11380   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11381   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11382   Register Ptr1Reg;
11383   Register TmpReg =
11384       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11385 
11386   //  thisMBB:
11387   //   ...
11388   //   fallthrough --> loopMBB
11389   BB->addSuccessor(loopMBB);
11390 
11391   // The 4-byte load must be aligned, while a char or short may be
11392   // anywhere in the word.  Hence all this nasty bookkeeping code.
11393   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
11394   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11395   //   xori shift, shift1, 24 [16]
11396   //   rlwinm ptr, ptr1, 0, 0, 29
11397   //   slw incr2, incr, shift
11398   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11399   //   slw mask, mask2, shift
11400   //  loopMBB:
11401   //   lwarx tmpDest, ptr
11402   //   add tmp, tmpDest, incr2
11403   //   andc tmp2, tmpDest, mask
11404   //   and tmp3, tmp, mask
11405   //   or tmp4, tmp3, tmp2
11406   //   stwcx. tmp4, ptr
11407   //   bne- loopMBB
11408   //   fallthrough --> exitMBB
11409   //   srw dest, tmpDest, shift
11410   if (ptrA != ZeroReg) {
11411     Ptr1Reg = RegInfo.createVirtualRegister(RC);
11412     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11413         .addReg(ptrA)
11414         .addReg(ptrB);
11415   } else {
11416     Ptr1Reg = ptrB;
11417   }
11418   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11419   // mode.
11420   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11421       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11422       .addImm(3)
11423       .addImm(27)
11424       .addImm(is8bit ? 28 : 27);
11425   if (!isLittleEndian)
11426     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11427         .addReg(Shift1Reg)
11428         .addImm(is8bit ? 24 : 16);
11429   if (is64bit)
11430     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11431         .addReg(Ptr1Reg)
11432         .addImm(0)
11433         .addImm(61);
11434   else
11435     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11436         .addReg(Ptr1Reg)
11437         .addImm(0)
11438         .addImm(0)
11439         .addImm(29);
11440   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11441   if (is8bit)
11442     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11443   else {
11444     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11445     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11446         .addReg(Mask3Reg)
11447         .addImm(65535);
11448   }
11449   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11450       .addReg(Mask2Reg)
11451       .addReg(ShiftReg);
11452 
11453   BB = loopMBB;
11454   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11455       .addReg(ZeroReg)
11456       .addReg(PtrReg);
11457   if (BinOpcode)
11458     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11459         .addReg(Incr2Reg)
11460         .addReg(TmpDestReg);
11461   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11462       .addReg(TmpDestReg)
11463       .addReg(MaskReg);
11464   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11465   if (CmpOpcode) {
11466     // For unsigned comparisons, we can directly compare the shifted values.
11467     // For signed comparisons we shift and sign extend.
11468     Register SReg = RegInfo.createVirtualRegister(GPRC);
11469     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11470         .addReg(TmpDestReg)
11471         .addReg(MaskReg);
11472     unsigned ValueReg = SReg;
11473     unsigned CmpReg = Incr2Reg;
11474     if (CmpOpcode == PPC::CMPW) {
11475       ValueReg = RegInfo.createVirtualRegister(GPRC);
11476       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11477           .addReg(SReg)
11478           .addReg(ShiftReg);
11479       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11480       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11481           .addReg(ValueReg);
11482       ValueReg = ValueSReg;
11483       CmpReg = incr;
11484     }
11485     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11486         .addReg(CmpReg)
11487         .addReg(ValueReg);
11488     BuildMI(BB, dl, TII->get(PPC::BCC))
11489         .addImm(CmpPred)
11490         .addReg(PPC::CR0)
11491         .addMBB(exitMBB);
11492     BB->addSuccessor(loop2MBB);
11493     BB->addSuccessor(exitMBB);
11494     BB = loop2MBB;
11495   }
11496   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11497   BuildMI(BB, dl, TII->get(PPC::STWCX))
11498       .addReg(Tmp4Reg)
11499       .addReg(ZeroReg)
11500       .addReg(PtrReg);
11501   BuildMI(BB, dl, TII->get(PPC::BCC))
11502       .addImm(PPC::PRED_NE)
11503       .addReg(PPC::CR0)
11504       .addMBB(loopMBB);
11505   BB->addSuccessor(loopMBB);
11506   BB->addSuccessor(exitMBB);
11507 
11508   //  exitMBB:
11509   //   ...
11510   BB = exitMBB;
11511   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11512       .addReg(TmpDestReg)
11513       .addReg(ShiftReg);
11514   return BB;
11515 }
11516 
11517 llvm::MachineBasicBlock *
11518 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
11519                                     MachineBasicBlock *MBB) const {
11520   DebugLoc DL = MI.getDebugLoc();
11521   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11522   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11523 
11524   MachineFunction *MF = MBB->getParent();
11525   MachineRegisterInfo &MRI = MF->getRegInfo();
11526 
11527   const BasicBlock *BB = MBB->getBasicBlock();
11528   MachineFunction::iterator I = ++MBB->getIterator();
11529 
11530   Register DstReg = MI.getOperand(0).getReg();
11531   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11532   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11533   Register mainDstReg = MRI.createVirtualRegister(RC);
11534   Register restoreDstReg = MRI.createVirtualRegister(RC);
11535 
11536   MVT PVT = getPointerTy(MF->getDataLayout());
11537   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11538          "Invalid Pointer Size!");
11539   // For v = setjmp(buf), we generate
11540   //
11541   // thisMBB:
11542   //  SjLjSetup mainMBB
11543   //  bl mainMBB
11544   //  v_restore = 1
11545   //  b sinkMBB
11546   //
11547   // mainMBB:
11548   //  buf[LabelOffset] = LR
11549   //  v_main = 0
11550   //
11551   // sinkMBB:
11552   //  v = phi(main, restore)
11553   //
11554 
11555   MachineBasicBlock *thisMBB = MBB;
11556   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
11557   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
11558   MF->insert(I, mainMBB);
11559   MF->insert(I, sinkMBB);
11560 
11561   MachineInstrBuilder MIB;
11562 
11563   // Transfer the remainder of BB and its successor edges to sinkMBB.
11564   sinkMBB->splice(sinkMBB->begin(), MBB,
11565                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11566   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
11567 
11568   // Note that the structure of the jmp_buf used here is not compatible
11569   // with that used by libc, and is not designed to be. Specifically, it
11570   // stores only those 'reserved' registers that LLVM does not otherwise
11571   // understand how to spill. Also, by convention, by the time this
11572   // intrinsic is called, Clang has already stored the frame address in the
11573   // first slot of the buffer and stack address in the third. Following the
11574   // X86 target code, we'll store the jump address in the second slot. We also
11575   // need to save the TOC pointer (R2) to handle jumps between shared
11576   // libraries, and that will be stored in the fourth slot. The thread
11577   // identifier (R13) is not affected.
11578 
11579   // thisMBB:
11580   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11581   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11582   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11583 
11584   // Prepare IP either in reg.
11585   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11586   Register LabelReg = MRI.createVirtualRegister(PtrRC);
11587   Register BufReg = MI.getOperand(1).getReg();
11588 
11589   if (Subtarget.is64BitELFABI()) {
11590     setUsesTOCBasePtr(*MBB->getParent());
11591     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11592               .addReg(PPC::X2)
11593               .addImm(TOCOffset)
11594               .addReg(BufReg)
11595               .cloneMemRefs(MI);
11596   }
11597 
11598   // Naked functions never have a base pointer, and so we use r1. For all
11599   // other functions, this decision must be delayed until during PEI.
11600   unsigned BaseReg;
11601   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11602     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11603   else
11604     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11605 
11606   MIB = BuildMI(*thisMBB, MI, DL,
11607                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11608             .addReg(BaseReg)
11609             .addImm(BPOffset)
11610             .addReg(BufReg)
11611             .cloneMemRefs(MI);
11612 
11613   // Setup
11614   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11615   MIB.addRegMask(TRI->getNoPreservedMask());
11616 
11617   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11618 
11619   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11620           .addMBB(mainMBB);
11621   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11622 
11623   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11624   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11625 
11626   // mainMBB:
11627   //  mainDstReg = 0
11628   MIB =
11629       BuildMI(mainMBB, DL,
11630               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11631 
11632   // Store IP
11633   if (Subtarget.isPPC64()) {
11634     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11635             .addReg(LabelReg)
11636             .addImm(LabelOffset)
11637             .addReg(BufReg);
11638   } else {
11639     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11640             .addReg(LabelReg)
11641             .addImm(LabelOffset)
11642             .addReg(BufReg);
11643   }
11644   MIB.cloneMemRefs(MI);
11645 
11646   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11647   mainMBB->addSuccessor(sinkMBB);
11648 
11649   // sinkMBB:
11650   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11651           TII->get(PPC::PHI), DstReg)
11652     .addReg(mainDstReg).addMBB(mainMBB)
11653     .addReg(restoreDstReg).addMBB(thisMBB);
11654 
11655   MI.eraseFromParent();
11656   return sinkMBB;
11657 }
11658 
11659 MachineBasicBlock *
11660 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11661                                      MachineBasicBlock *MBB) const {
11662   DebugLoc DL = MI.getDebugLoc();
11663   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11664 
11665   MachineFunction *MF = MBB->getParent();
11666   MachineRegisterInfo &MRI = MF->getRegInfo();
11667 
11668   MVT PVT = getPointerTy(MF->getDataLayout());
11669   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11670          "Invalid Pointer Size!");
11671 
11672   const TargetRegisterClass *RC =
11673     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11674   Register Tmp = MRI.createVirtualRegister(RC);
11675   // Since FP is only updated here but NOT referenced, it's treated as GPR.
11676   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11677   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11678   unsigned BP =
11679       (PVT == MVT::i64)
11680           ? PPC::X30
11681           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11682                                                               : PPC::R30);
11683 
11684   MachineInstrBuilder MIB;
11685 
11686   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11687   const int64_t SPOffset    = 2 * PVT.getStoreSize();
11688   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11689   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11690 
11691   Register BufReg = MI.getOperand(0).getReg();
11692 
11693   // Reload FP (the jumped-to function may not have had a
11694   // frame pointer, and if so, then its r31 will be restored
11695   // as necessary).
11696   if (PVT == MVT::i64) {
11697     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11698             .addImm(0)
11699             .addReg(BufReg);
11700   } else {
11701     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11702             .addImm(0)
11703             .addReg(BufReg);
11704   }
11705   MIB.cloneMemRefs(MI);
11706 
11707   // Reload IP
11708   if (PVT == MVT::i64) {
11709     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11710             .addImm(LabelOffset)
11711             .addReg(BufReg);
11712   } else {
11713     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11714             .addImm(LabelOffset)
11715             .addReg(BufReg);
11716   }
11717   MIB.cloneMemRefs(MI);
11718 
11719   // Reload SP
11720   if (PVT == MVT::i64) {
11721     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11722             .addImm(SPOffset)
11723             .addReg(BufReg);
11724   } else {
11725     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11726             .addImm(SPOffset)
11727             .addReg(BufReg);
11728   }
11729   MIB.cloneMemRefs(MI);
11730 
11731   // Reload BP
11732   if (PVT == MVT::i64) {
11733     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11734             .addImm(BPOffset)
11735             .addReg(BufReg);
11736   } else {
11737     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11738             .addImm(BPOffset)
11739             .addReg(BufReg);
11740   }
11741   MIB.cloneMemRefs(MI);
11742 
11743   // Reload TOC
11744   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11745     setUsesTOCBasePtr(*MBB->getParent());
11746     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11747               .addImm(TOCOffset)
11748               .addReg(BufReg)
11749               .cloneMemRefs(MI);
11750   }
11751 
11752   // Jump
11753   BuildMI(*MBB, MI, DL,
11754           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11755   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11756 
11757   MI.eraseFromParent();
11758   return MBB;
11759 }
11760 
11761 bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
11762   // If the function specifically requests inline stack probes, emit them.
11763   if (MF.getFunction().hasFnAttribute("probe-stack"))
11764     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11765            "inline-asm";
11766   return false;
11767 }
11768 
11769 unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
11770   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11771   unsigned StackAlign = TFI->getStackAlignment();
11772   assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
11773          "Unexpected stack alignment");
11774   // The default stack probe size is 4096 if the function has no
11775   // stack-probe-size attribute.
11776   unsigned StackProbeSize = 4096;
11777   const Function &Fn = MF.getFunction();
11778   if (Fn.hasFnAttribute("stack-probe-size"))
11779     Fn.getFnAttribute("stack-probe-size")
11780         .getValueAsString()
11781         .getAsInteger(0, StackProbeSize);
11782   // Round down to the stack alignment.
11783   StackProbeSize &= ~(StackAlign - 1);
11784   return StackProbeSize ? StackProbeSize : StackAlign;
11785 }
11786 
11787 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11788 // into three phases. In the first phase, it uses pseudo instruction
11789 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11790 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11791 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11792 // MaxCallFrameSize so that it can calculate correct data area pointer.
11793 MachineBasicBlock *
11794 PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
11795                                     MachineBasicBlock *MBB) const {
11796   const bool isPPC64 = Subtarget.isPPC64();
11797   MachineFunction *MF = MBB->getParent();
11798   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11799   DebugLoc DL = MI.getDebugLoc();
11800   const unsigned ProbeSize = getStackProbeSize(*MF);
11801   const BasicBlock *ProbedBB = MBB->getBasicBlock();
11802   MachineRegisterInfo &MRI = MF->getRegInfo();
11803   // The CFG of probing stack looks as
11804   //         +-----+
11805   //         | MBB |
11806   //         +--+--+
11807   //            |
11808   //       +----v----+
11809   //  +--->+ TestMBB +---+
11810   //  |    +----+----+   |
11811   //  |         |        |
11812   //  |   +-----v----+   |
11813   //  +---+ BlockMBB |   |
11814   //      +----------+   |
11815   //                     |
11816   //       +---------+   |
11817   //       | TailMBB +<--+
11818   //       +---------+
11819   // In MBB, calculate previous frame pointer and final stack pointer.
11820   // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11821   // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11822   // TailMBB is spliced via \p MI.
11823   MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11824   MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11825   MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11826 
11827   MachineFunction::iterator MBBIter = ++MBB->getIterator();
11828   MF->insert(MBBIter, TestMBB);
11829   MF->insert(MBBIter, BlockMBB);
11830   MF->insert(MBBIter, TailMBB);
11831 
11832   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11833   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11834 
11835   Register DstReg = MI.getOperand(0).getReg();
11836   Register NegSizeReg = MI.getOperand(1).getReg();
11837   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11838   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11839   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11840   Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11841 
11842   // Since value of NegSizeReg might be realigned in prologepilog, insert a
11843   // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11844   // NegSize.
11845   unsigned ProbeOpc;
11846   if (!MRI.hasOneNonDBGUse(NegSizeReg))
11847     ProbeOpc =
11848         isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11849   else
11850     // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11851     // and NegSizeReg will be allocated in the same phyreg to avoid
11852     // redundant copy when NegSizeReg has only one use which is current MI and
11853     // will be replaced by PREPARE_PROBED_ALLOCA then.
11854     ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11855                        : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11856   BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11857       .addDef(ActualNegSizeReg)
11858       .addReg(NegSizeReg)
11859       .add(MI.getOperand(2))
11860       .add(MI.getOperand(3));
11861 
11862   // Calculate final stack pointer, which equals to SP + ActualNegSize.
11863   BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11864           FinalStackPtr)
11865       .addReg(SPReg)
11866       .addReg(ActualNegSizeReg);
11867 
11868   // Materialize a scratch register for update.
11869   int64_t NegProbeSize = -(int64_t)ProbeSize;
11870   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11871   Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11872   if (!isInt<16>(NegProbeSize)) {
11873     Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11874     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11875         .addImm(NegProbeSize >> 16);
11876     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11877             ScratchReg)
11878         .addReg(TempReg)
11879         .addImm(NegProbeSize & 0xFFFF);
11880   } else
11881     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11882         .addImm(NegProbeSize);
11883 
11884   {
11885     // Probing leading residual part.
11886     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11887     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11888         .addReg(ActualNegSizeReg)
11889         .addReg(ScratchReg);
11890     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11891     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11892         .addReg(Div)
11893         .addReg(ScratchReg);
11894     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11895     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11896         .addReg(Mul)
11897         .addReg(ActualNegSizeReg);
11898     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11899         .addReg(FramePointer)
11900         .addReg(SPReg)
11901         .addReg(NegMod);
11902   }
11903 
11904   {
11905     // Remaining part should be multiple of ProbeSize.
11906     Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11907     BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11908         .addReg(SPReg)
11909         .addReg(FinalStackPtr);
11910     BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11911         .addImm(PPC::PRED_EQ)
11912         .addReg(CmpResult)
11913         .addMBB(TailMBB);
11914     TestMBB->addSuccessor(BlockMBB);
11915     TestMBB->addSuccessor(TailMBB);
11916   }
11917 
11918   {
11919     // Touch the block.
11920     // |P...|P...|P...
11921     BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11922         .addReg(FramePointer)
11923         .addReg(SPReg)
11924         .addReg(ScratchReg);
11925     BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11926     BlockMBB->addSuccessor(TestMBB);
11927   }
11928 
11929   // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11930   // DYNAREAOFFSET pseudo instruction to get the future result.
11931   Register MaxCallFrameSizeReg =
11932       MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11933   BuildMI(TailMBB, DL,
11934           TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11935           MaxCallFrameSizeReg)
11936       .add(MI.getOperand(2))
11937       .add(MI.getOperand(3));
11938   BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11939       .addReg(SPReg)
11940       .addReg(MaxCallFrameSizeReg);
11941 
11942   // Splice instructions after MI to TailMBB.
11943   TailMBB->splice(TailMBB->end(), MBB,
11944                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11945   TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
11946   MBB->addSuccessor(TestMBB);
11947 
11948   // Delete the pseudo instruction.
11949   MI.eraseFromParent();
11950 
11951   ++NumDynamicAllocaProbed;
11952   return TailMBB;
11953 }
11954 
11955 MachineBasicBlock *
11956 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11957                                                MachineBasicBlock *BB) const {
11958   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11959       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11960     if (Subtarget.is64BitELFABI() &&
11961         MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11962         !Subtarget.isUsingPCRelativeCalls()) {
11963       // Call lowering should have added an r2 operand to indicate a dependence
11964       // on the TOC base pointer value. It can't however, because there is no
11965       // way to mark the dependence as implicit there, and so the stackmap code
11966       // will confuse it with a regular operand. Instead, add the dependence
11967       // here.
11968       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11969     }
11970 
11971     return emitPatchPoint(MI, BB);
11972   }
11973 
11974   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11975       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11976     return emitEHSjLjSetJmp(MI, BB);
11977   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11978              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11979     return emitEHSjLjLongJmp(MI, BB);
11980   }
11981 
11982   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11983 
11984   // To "insert" these instructions we actually have to insert their
11985   // control-flow patterns.
11986   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11987   MachineFunction::iterator It = ++BB->getIterator();
11988 
11989   MachineFunction *F = BB->getParent();
11990 
11991   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11992       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11993       MI.getOpcode() == PPC::SELECT_I8) {
11994     SmallVector<MachineOperand, 2> Cond;
11995     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11996         MI.getOpcode() == PPC::SELECT_CC_I8)
11997       Cond.push_back(MI.getOperand(4));
11998     else
11999       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
12000     Cond.push_back(MI.getOperand(1));
12001 
12002     DebugLoc dl = MI.getDebugLoc();
12003     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12004                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12005   } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
12006              MI.getOpcode() == PPC::SELECT_CC_F8 ||
12007              MI.getOpcode() == PPC::SELECT_CC_F16 ||
12008              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12009              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12010              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12011              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12012              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12013              MI.getOpcode() == PPC::SELECT_CC_SPE ||
12014              MI.getOpcode() == PPC::SELECT_F4 ||
12015              MI.getOpcode() == PPC::SELECT_F8 ||
12016              MI.getOpcode() == PPC::SELECT_F16 ||
12017              MI.getOpcode() == PPC::SELECT_SPE ||
12018              MI.getOpcode() == PPC::SELECT_SPE4 ||
12019              MI.getOpcode() == PPC::SELECT_VRRC ||
12020              MI.getOpcode() == PPC::SELECT_VSFRC ||
12021              MI.getOpcode() == PPC::SELECT_VSSRC ||
12022              MI.getOpcode() == PPC::SELECT_VSRC) {
12023     // The incoming instruction knows the destination vreg to set, the
12024     // condition code register to branch on, the true/false values to
12025     // select between, and a branch opcode to use.
12026 
12027     //  thisMBB:
12028     //  ...
12029     //   TrueVal = ...
12030     //   cmpTY ccX, r1, r2
12031     //   bCC copy1MBB
12032     //   fallthrough --> copy0MBB
12033     MachineBasicBlock *thisMBB = BB;
12034     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12035     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12036     DebugLoc dl = MI.getDebugLoc();
12037     F->insert(It, copy0MBB);
12038     F->insert(It, sinkMBB);
12039 
12040     // Transfer the remainder of BB and its successor edges to sinkMBB.
12041     sinkMBB->splice(sinkMBB->begin(), BB,
12042                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12043     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12044 
12045     // Next, add the true and fallthrough blocks as its successors.
12046     BB->addSuccessor(copy0MBB);
12047     BB->addSuccessor(sinkMBB);
12048 
12049     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12050         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12051         MI.getOpcode() == PPC::SELECT_F16 ||
12052         MI.getOpcode() == PPC::SELECT_SPE4 ||
12053         MI.getOpcode() == PPC::SELECT_SPE ||
12054         MI.getOpcode() == PPC::SELECT_VRRC ||
12055         MI.getOpcode() == PPC::SELECT_VSFRC ||
12056         MI.getOpcode() == PPC::SELECT_VSSRC ||
12057         MI.getOpcode() == PPC::SELECT_VSRC) {
12058       BuildMI(BB, dl, TII->get(PPC::BC))
12059           .addReg(MI.getOperand(1).getReg())
12060           .addMBB(sinkMBB);
12061     } else {
12062       unsigned SelectPred = MI.getOperand(4).getImm();
12063       BuildMI(BB, dl, TII->get(PPC::BCC))
12064           .addImm(SelectPred)
12065           .addReg(MI.getOperand(1).getReg())
12066           .addMBB(sinkMBB);
12067     }
12068 
12069     //  copy0MBB:
12070     //   %FalseValue = ...
12071     //   # fallthrough to sinkMBB
12072     BB = copy0MBB;
12073 
12074     // Update machine-CFG edges
12075     BB->addSuccessor(sinkMBB);
12076 
12077     //  sinkMBB:
12078     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12079     //  ...
12080     BB = sinkMBB;
12081     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12082         .addReg(MI.getOperand(3).getReg())
12083         .addMBB(copy0MBB)
12084         .addReg(MI.getOperand(2).getReg())
12085         .addMBB(thisMBB);
12086   } else if (MI.getOpcode() == PPC::ReadTB) {
12087     // To read the 64-bit time-base register on a 32-bit target, we read the
12088     // two halves. Should the counter have wrapped while it was being read, we
12089     // need to try again.
12090     // ...
12091     // readLoop:
12092     // mfspr Rx,TBU # load from TBU
12093     // mfspr Ry,TB  # load from TB
12094     // mfspr Rz,TBU # load from TBU
12095     // cmpw crX,Rx,Rz # check if 'old'='new'
12096     // bne readLoop   # branch if they're not equal
12097     // ...
12098 
12099     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12100     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12101     DebugLoc dl = MI.getDebugLoc();
12102     F->insert(It, readMBB);
12103     F->insert(It, sinkMBB);
12104 
12105     // Transfer the remainder of BB and its successor edges to sinkMBB.
12106     sinkMBB->splice(sinkMBB->begin(), BB,
12107                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12108     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12109 
12110     BB->addSuccessor(readMBB);
12111     BB = readMBB;
12112 
12113     MachineRegisterInfo &RegInfo = F->getRegInfo();
12114     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12115     Register LoReg = MI.getOperand(0).getReg();
12116     Register HiReg = MI.getOperand(1).getReg();
12117 
12118     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12119     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12120     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12121 
12122     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12123 
12124     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12125         .addReg(HiReg)
12126         .addReg(ReadAgainReg);
12127     BuildMI(BB, dl, TII->get(PPC::BCC))
12128         .addImm(PPC::PRED_NE)
12129         .addReg(CmpReg)
12130         .addMBB(readMBB);
12131 
12132     BB->addSuccessor(readMBB);
12133     BB->addSuccessor(sinkMBB);
12134   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12135     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12136   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12137     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12138   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12139     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12140   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12141     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12142 
12143   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12144     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12145   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12146     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12147   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12148     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12149   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12150     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12151 
12152   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12153     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12154   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12155     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12156   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12157     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12158   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12159     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12160 
12161   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12162     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12163   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12164     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12165   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12166     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12167   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12168     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12169 
12170   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12171     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12172   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12173     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12174   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12175     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12176   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12177     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12178 
12179   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12180     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12181   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12182     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12183   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12184     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12185   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12186     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12187 
12188   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12189     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12190   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12191     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12192   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12193     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12194   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12195     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12196 
12197   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12198     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12199   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12200     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12201   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12202     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12203   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12204     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12205 
12206   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12207     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12208   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12209     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12210   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12211     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12212   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12213     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12214 
12215   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12216     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12217   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12218     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12219   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12220     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12221   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12222     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12223 
12224   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12225     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12226   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12227     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12228   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12229     BB = EmitAtomicBinary(MI, BB, 4, 0);
12230   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12231     BB = EmitAtomicBinary(MI, BB, 8, 0);
12232   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12233            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12234            (Subtarget.hasPartwordAtomics() &&
12235             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12236            (Subtarget.hasPartwordAtomics() &&
12237             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12238     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12239 
12240     auto LoadMnemonic = PPC::LDARX;
12241     auto StoreMnemonic = PPC::STDCX;
12242     switch (MI.getOpcode()) {
12243     default:
12244       llvm_unreachable("Compare and swap of unknown size");
12245     case PPC::ATOMIC_CMP_SWAP_I8:
12246       LoadMnemonic = PPC::LBARX;
12247       StoreMnemonic = PPC::STBCX;
12248       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12249       break;
12250     case PPC::ATOMIC_CMP_SWAP_I16:
12251       LoadMnemonic = PPC::LHARX;
12252       StoreMnemonic = PPC::STHCX;
12253       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12254       break;
12255     case PPC::ATOMIC_CMP_SWAP_I32:
12256       LoadMnemonic = PPC::LWARX;
12257       StoreMnemonic = PPC::STWCX;
12258       break;
12259     case PPC::ATOMIC_CMP_SWAP_I64:
12260       LoadMnemonic = PPC::LDARX;
12261       StoreMnemonic = PPC::STDCX;
12262       break;
12263     }
12264     Register dest = MI.getOperand(0).getReg();
12265     Register ptrA = MI.getOperand(1).getReg();
12266     Register ptrB = MI.getOperand(2).getReg();
12267     Register oldval = MI.getOperand(3).getReg();
12268     Register newval = MI.getOperand(4).getReg();
12269     DebugLoc dl = MI.getDebugLoc();
12270 
12271     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12272     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12273     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12274     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12275     F->insert(It, loop1MBB);
12276     F->insert(It, loop2MBB);
12277     F->insert(It, midMBB);
12278     F->insert(It, exitMBB);
12279     exitMBB->splice(exitMBB->begin(), BB,
12280                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12281     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12282 
12283     //  thisMBB:
12284     //   ...
12285     //   fallthrough --> loopMBB
12286     BB->addSuccessor(loop1MBB);
12287 
12288     // loop1MBB:
12289     //   l[bhwd]arx dest, ptr
12290     //   cmp[wd] dest, oldval
12291     //   bne- midMBB
12292     // loop2MBB:
12293     //   st[bhwd]cx. newval, ptr
12294     //   bne- loopMBB
12295     //   b exitBB
12296     // midMBB:
12297     //   st[bhwd]cx. dest, ptr
12298     // exitBB:
12299     BB = loop1MBB;
12300     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12301     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12302         .addReg(oldval)
12303         .addReg(dest);
12304     BuildMI(BB, dl, TII->get(PPC::BCC))
12305         .addImm(PPC::PRED_NE)
12306         .addReg(PPC::CR0)
12307         .addMBB(midMBB);
12308     BB->addSuccessor(loop2MBB);
12309     BB->addSuccessor(midMBB);
12310 
12311     BB = loop2MBB;
12312     BuildMI(BB, dl, TII->get(StoreMnemonic))
12313         .addReg(newval)
12314         .addReg(ptrA)
12315         .addReg(ptrB);
12316     BuildMI(BB, dl, TII->get(PPC::BCC))
12317         .addImm(PPC::PRED_NE)
12318         .addReg(PPC::CR0)
12319         .addMBB(loop1MBB);
12320     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12321     BB->addSuccessor(loop1MBB);
12322     BB->addSuccessor(exitMBB);
12323 
12324     BB = midMBB;
12325     BuildMI(BB, dl, TII->get(StoreMnemonic))
12326         .addReg(dest)
12327         .addReg(ptrA)
12328         .addReg(ptrB);
12329     BB->addSuccessor(exitMBB);
12330 
12331     //  exitMBB:
12332     //   ...
12333     BB = exitMBB;
12334   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12335              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12336     // We must use 64-bit registers for addresses when targeting 64-bit,
12337     // since we're actually doing arithmetic on them.  Other registers
12338     // can be 32-bit.
12339     bool is64bit = Subtarget.isPPC64();
12340     bool isLittleEndian = Subtarget.isLittleEndian();
12341     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12342 
12343     Register dest = MI.getOperand(0).getReg();
12344     Register ptrA = MI.getOperand(1).getReg();
12345     Register ptrB = MI.getOperand(2).getReg();
12346     Register oldval = MI.getOperand(3).getReg();
12347     Register newval = MI.getOperand(4).getReg();
12348     DebugLoc dl = MI.getDebugLoc();
12349 
12350     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12351     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12352     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12353     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12354     F->insert(It, loop1MBB);
12355     F->insert(It, loop2MBB);
12356     F->insert(It, midMBB);
12357     F->insert(It, exitMBB);
12358     exitMBB->splice(exitMBB->begin(), BB,
12359                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12360     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12361 
12362     MachineRegisterInfo &RegInfo = F->getRegInfo();
12363     const TargetRegisterClass *RC =
12364         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12365     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12366 
12367     Register PtrReg = RegInfo.createVirtualRegister(RC);
12368     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12369     Register ShiftReg =
12370         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12371     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12372     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12373     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12374     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12375     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12376     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12377     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12378     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12379     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12380     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12381     Register Ptr1Reg;
12382     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12383     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12384     //  thisMBB:
12385     //   ...
12386     //   fallthrough --> loopMBB
12387     BB->addSuccessor(loop1MBB);
12388 
12389     // The 4-byte load must be aligned, while a char or short may be
12390     // anywhere in the word.  Hence all this nasty bookkeeping code.
12391     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
12392     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12393     //   xori shift, shift1, 24 [16]
12394     //   rlwinm ptr, ptr1, 0, 0, 29
12395     //   slw newval2, newval, shift
12396     //   slw oldval2, oldval,shift
12397     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12398     //   slw mask, mask2, shift
12399     //   and newval3, newval2, mask
12400     //   and oldval3, oldval2, mask
12401     // loop1MBB:
12402     //   lwarx tmpDest, ptr
12403     //   and tmp, tmpDest, mask
12404     //   cmpw tmp, oldval3
12405     //   bne- midMBB
12406     // loop2MBB:
12407     //   andc tmp2, tmpDest, mask
12408     //   or tmp4, tmp2, newval3
12409     //   stwcx. tmp4, ptr
12410     //   bne- loop1MBB
12411     //   b exitBB
12412     // midMBB:
12413     //   stwcx. tmpDest, ptr
12414     // exitBB:
12415     //   srw dest, tmpDest, shift
12416     if (ptrA != ZeroReg) {
12417       Ptr1Reg = RegInfo.createVirtualRegister(RC);
12418       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12419           .addReg(ptrA)
12420           .addReg(ptrB);
12421     } else {
12422       Ptr1Reg = ptrB;
12423     }
12424 
12425     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12426     // mode.
12427     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12428         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12429         .addImm(3)
12430         .addImm(27)
12431         .addImm(is8bit ? 28 : 27);
12432     if (!isLittleEndian)
12433       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12434           .addReg(Shift1Reg)
12435           .addImm(is8bit ? 24 : 16);
12436     if (is64bit)
12437       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12438           .addReg(Ptr1Reg)
12439           .addImm(0)
12440           .addImm(61);
12441     else
12442       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12443           .addReg(Ptr1Reg)
12444           .addImm(0)
12445           .addImm(0)
12446           .addImm(29);
12447     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12448         .addReg(newval)
12449         .addReg(ShiftReg);
12450     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12451         .addReg(oldval)
12452         .addReg(ShiftReg);
12453     if (is8bit)
12454       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12455     else {
12456       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12457       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12458           .addReg(Mask3Reg)
12459           .addImm(65535);
12460     }
12461     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12462         .addReg(Mask2Reg)
12463         .addReg(ShiftReg);
12464     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12465         .addReg(NewVal2Reg)
12466         .addReg(MaskReg);
12467     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12468         .addReg(OldVal2Reg)
12469         .addReg(MaskReg);
12470 
12471     BB = loop1MBB;
12472     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12473         .addReg(ZeroReg)
12474         .addReg(PtrReg);
12475     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12476         .addReg(TmpDestReg)
12477         .addReg(MaskReg);
12478     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12479         .addReg(TmpReg)
12480         .addReg(OldVal3Reg);
12481     BuildMI(BB, dl, TII->get(PPC::BCC))
12482         .addImm(PPC::PRED_NE)
12483         .addReg(PPC::CR0)
12484         .addMBB(midMBB);
12485     BB->addSuccessor(loop2MBB);
12486     BB->addSuccessor(midMBB);
12487 
12488     BB = loop2MBB;
12489     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12490         .addReg(TmpDestReg)
12491         .addReg(MaskReg);
12492     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12493         .addReg(Tmp2Reg)
12494         .addReg(NewVal3Reg);
12495     BuildMI(BB, dl, TII->get(PPC::STWCX))
12496         .addReg(Tmp4Reg)
12497         .addReg(ZeroReg)
12498         .addReg(PtrReg);
12499     BuildMI(BB, dl, TII->get(PPC::BCC))
12500         .addImm(PPC::PRED_NE)
12501         .addReg(PPC::CR0)
12502         .addMBB(loop1MBB);
12503     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12504     BB->addSuccessor(loop1MBB);
12505     BB->addSuccessor(exitMBB);
12506 
12507     BB = midMBB;
12508     BuildMI(BB, dl, TII->get(PPC::STWCX))
12509         .addReg(TmpDestReg)
12510         .addReg(ZeroReg)
12511         .addReg(PtrReg);
12512     BB->addSuccessor(exitMBB);
12513 
12514     //  exitMBB:
12515     //   ...
12516     BB = exitMBB;
12517     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12518         .addReg(TmpReg)
12519         .addReg(ShiftReg);
12520   } else if (MI.getOpcode() == PPC::FADDrtz) {
12521     // This pseudo performs an FADD with rounding mode temporarily forced
12522     // to round-to-zero.  We emit this via custom inserter since the FPSCR
12523     // is not modeled at the SelectionDAG level.
12524     Register Dest = MI.getOperand(0).getReg();
12525     Register Src1 = MI.getOperand(1).getReg();
12526     Register Src2 = MI.getOperand(2).getReg();
12527     DebugLoc dl = MI.getDebugLoc();
12528 
12529     MachineRegisterInfo &RegInfo = F->getRegInfo();
12530     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12531 
12532     // Save FPSCR value.
12533     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12534 
12535     // Set rounding mode to round-to-zero.
12536     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12537         .addImm(31)
12538         .addReg(PPC::RM, RegState::ImplicitDefine);
12539 
12540     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12541         .addImm(30)
12542         .addReg(PPC::RM, RegState::ImplicitDefine);
12543 
12544     // Perform addition.
12545     auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12546                    .addReg(Src1)
12547                    .addReg(Src2);
12548     if (MI.getFlag(MachineInstr::NoFPExcept))
12549       MIB.setMIFlag(MachineInstr::NoFPExcept);
12550 
12551     // Restore FPSCR value.
12552     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12553   } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12554              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12555              MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12556              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12557     unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12558                        MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12559                           ? PPC::ANDI8_rec
12560                           : PPC::ANDI_rec;
12561     bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12562                  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12563 
12564     MachineRegisterInfo &RegInfo = F->getRegInfo();
12565     Register Dest = RegInfo.createVirtualRegister(
12566         Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12567 
12568     DebugLoc Dl = MI.getDebugLoc();
12569     BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12570         .addReg(MI.getOperand(1).getReg())
12571         .addImm(1);
12572     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12573             MI.getOperand(0).getReg())
12574         .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12575   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12576     DebugLoc Dl = MI.getDebugLoc();
12577     MachineRegisterInfo &RegInfo = F->getRegInfo();
12578     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12579     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12580     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12581             MI.getOperand(0).getReg())
12582         .addReg(CRReg);
12583   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12584     DebugLoc Dl = MI.getDebugLoc();
12585     unsigned Imm = MI.getOperand(1).getImm();
12586     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12587     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12588             MI.getOperand(0).getReg())
12589         .addReg(PPC::CR0EQ);
12590   } else if (MI.getOpcode() == PPC::SETRNDi) {
12591     DebugLoc dl = MI.getDebugLoc();
12592     Register OldFPSCRReg = MI.getOperand(0).getReg();
12593 
12594     // Save FPSCR value.
12595     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12596 
12597     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12598     // the following settings:
12599     //   00 Round to nearest
12600     //   01 Round to 0
12601     //   10 Round to +inf
12602     //   11 Round to -inf
12603 
12604     // When the operand is immediate, using the two least significant bits of
12605     // the immediate to set the bits 62:63 of FPSCR.
12606     unsigned Mode = MI.getOperand(1).getImm();
12607     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12608         .addImm(31)
12609         .addReg(PPC::RM, RegState::ImplicitDefine);
12610 
12611     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12612         .addImm(30)
12613         .addReg(PPC::RM, RegState::ImplicitDefine);
12614   } else if (MI.getOpcode() == PPC::SETRND) {
12615     DebugLoc dl = MI.getDebugLoc();
12616 
12617     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12618     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12619     // If the target doesn't have DirectMove, we should use stack to do the
12620     // conversion, because the target doesn't have the instructions like mtvsrd
12621     // or mfvsrd to do this conversion directly.
12622     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12623       if (Subtarget.hasDirectMove()) {
12624         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12625           .addReg(SrcReg);
12626       } else {
12627         // Use stack to do the register copy.
12628         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12629         MachineRegisterInfo &RegInfo = F->getRegInfo();
12630         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12631         if (RC == &PPC::F8RCRegClass) {
12632           // Copy register from F8RCRegClass to G8RCRegclass.
12633           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12634                  "Unsupported RegClass.");
12635 
12636           StoreOp = PPC::STFD;
12637           LoadOp = PPC::LD;
12638         } else {
12639           // Copy register from G8RCRegClass to F8RCRegclass.
12640           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12641                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12642                  "Unsupported RegClass.");
12643         }
12644 
12645         MachineFrameInfo &MFI = F->getFrameInfo();
12646         int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12647 
12648         MachineMemOperand *MMOStore = F->getMachineMemOperand(
12649             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12650             MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12651             MFI.getObjectAlign(FrameIdx));
12652 
12653         // Store the SrcReg into the stack.
12654         BuildMI(*BB, MI, dl, TII->get(StoreOp))
12655           .addReg(SrcReg)
12656           .addImm(0)
12657           .addFrameIndex(FrameIdx)
12658           .addMemOperand(MMOStore);
12659 
12660         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12661             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12662             MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12663             MFI.getObjectAlign(FrameIdx));
12664 
12665         // Load from the stack where SrcReg is stored, and save to DestReg,
12666         // so we have done the RegClass conversion from RegClass::SrcReg to
12667         // RegClass::DestReg.
12668         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12669           .addImm(0)
12670           .addFrameIndex(FrameIdx)
12671           .addMemOperand(MMOLoad);
12672       }
12673     };
12674 
12675     Register OldFPSCRReg = MI.getOperand(0).getReg();
12676 
12677     // Save FPSCR value.
12678     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12679 
12680     // When the operand is gprc register, use two least significant bits of the
12681     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12682     //
12683     // copy OldFPSCRTmpReg, OldFPSCRReg
12684     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12685     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12686     // copy NewFPSCRReg, NewFPSCRTmpReg
12687     // mtfsf 255, NewFPSCRReg
12688     MachineOperand SrcOp = MI.getOperand(1);
12689     MachineRegisterInfo &RegInfo = F->getRegInfo();
12690     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12691 
12692     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12693 
12694     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12695     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12696 
12697     // The first operand of INSERT_SUBREG should be a register which has
12698     // subregisters, we only care about its RegClass, so we should use an
12699     // IMPLICIT_DEF register.
12700     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12701     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12702       .addReg(ImDefReg)
12703       .add(SrcOp)
12704       .addImm(1);
12705 
12706     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12707     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12708       .addReg(OldFPSCRTmpReg)
12709       .addReg(ExtSrcReg)
12710       .addImm(0)
12711       .addImm(62);
12712 
12713     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12714     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12715 
12716     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12717     // bits of FPSCR.
12718     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12719       .addImm(255)
12720       .addReg(NewFPSCRReg)
12721       .addImm(0)
12722       .addImm(0);
12723   } else if (MI.getOpcode() == PPC::SETFLM) {
12724     DebugLoc Dl = MI.getDebugLoc();
12725 
12726     // Result of setflm is previous FPSCR content, so we need to save it first.
12727     Register OldFPSCRReg = MI.getOperand(0).getReg();
12728     BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12729 
12730     // Put bits in 32:63 to FPSCR.
12731     Register NewFPSCRReg = MI.getOperand(1).getReg();
12732     BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12733         .addImm(255)
12734         .addReg(NewFPSCRReg)
12735         .addImm(0)
12736         .addImm(0);
12737   } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12738              MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12739     return emitProbedAlloca(MI, BB);
12740   } else {
12741     llvm_unreachable("Unexpected instr type to insert");
12742   }
12743 
12744   MI.eraseFromParent(); // The pseudo instruction is gone now.
12745   return BB;
12746 }
12747 
12748 //===----------------------------------------------------------------------===//
12749 // Target Optimization Hooks
12750 //===----------------------------------------------------------------------===//
12751 
12752 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12753   // For the estimates, convergence is quadratic, so we essentially double the
12754   // number of digits correct after every iteration. For both FRE and FRSQRTE,
12755   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12756   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12757   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12758   if (VT.getScalarType() == MVT::f64)
12759     RefinementSteps++;
12760   return RefinementSteps;
12761 }
12762 
12763 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
12764                                             const DenormalMode &Mode) const {
12765   // TODO - add support for v2f64/v4f32
12766   EVT VT = Op.getValueType();
12767   if (VT != MVT::f64)
12768     return SDValue();
12769 
12770   SDLoc DL(Op);
12771   // The output register of FTSQRT is CR field.
12772   SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
12773   // ftsqrt BF,FRB
12774   // Let e_b be the unbiased exponent of the double-precision
12775   // floating-point operand in register FRB.
12776   // fe_flag is set to 1 if either of the following conditions occurs.
12777   //   - The double-precision floating-point operand in register FRB is a zero,
12778   //     a NaN, or an infinity, or a negative value.
12779   //   - e_b is less than or equal to -970.
12780   // Otherwise fe_flag is set to 0.
12781   // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
12782   // not eligible for iteration. (zero/negative/infinity/nan or unbiased
12783   // exponent is less than -970)
12784   SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
12785   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
12786                                     FTSQRT, SRIdxVal),
12787                  0);
12788 }
12789 
12790 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12791                                            int Enabled, int &RefinementSteps,
12792                                            bool &UseOneConstNR,
12793                                            bool Reciprocal) const {
12794   EVT VT = Operand.getValueType();
12795   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12796       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12797       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12798       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12799     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12800       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12801 
12802     // The Newton-Raphson computation with a single constant does not provide
12803     // enough accuracy on some CPUs.
12804     UseOneConstNR = !Subtarget.needsTwoConstNR();
12805     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12806   }
12807   return SDValue();
12808 }
12809 
12810 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12811                                             int Enabled,
12812                                             int &RefinementSteps) const {
12813   EVT VT = Operand.getValueType();
12814   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12815       (VT == MVT::f64 && Subtarget.hasFRE()) ||
12816       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12817       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12818     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12819       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12820     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12821   }
12822   return SDValue();
12823 }
12824 
12825 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12826   // Note: This functionality is used only when unsafe-fp-math is enabled, and
12827   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12828   // enabled for division), this functionality is redundant with the default
12829   // combiner logic (once the division -> reciprocal/multiply transformation
12830   // has taken place). As a result, this matters more for older cores than for
12831   // newer ones.
12832 
12833   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12834   // reciprocal if there are two or more FDIVs (for embedded cores with only
12835   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12836   switch (Subtarget.getCPUDirective()) {
12837   default:
12838     return 3;
12839   case PPC::DIR_440:
12840   case PPC::DIR_A2:
12841   case PPC::DIR_E500:
12842   case PPC::DIR_E500mc:
12843   case PPC::DIR_E5500:
12844     return 2;
12845   }
12846 }
12847 
12848 // isConsecutiveLSLoc needs to work even if all adds have not yet been
12849 // collapsed, and so we need to look through chains of them.
12850 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
12851                                      int64_t& Offset, SelectionDAG &DAG) {
12852   if (DAG.isBaseWithConstantOffset(Loc)) {
12853     Base = Loc.getOperand(0);
12854     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12855 
12856     // The base might itself be a base plus an offset, and if so, accumulate
12857     // that as well.
12858     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12859   }
12860 }
12861 
12862 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12863                             unsigned Bytes, int Dist,
12864                             SelectionDAG &DAG) {
12865   if (VT.getSizeInBits() / 8 != Bytes)
12866     return false;
12867 
12868   SDValue BaseLoc = Base->getBasePtr();
12869   if (Loc.getOpcode() == ISD::FrameIndex) {
12870     if (BaseLoc.getOpcode() != ISD::FrameIndex)
12871       return false;
12872     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12873     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
12874     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12875     int FS  = MFI.getObjectSize(FI);
12876     int BFS = MFI.getObjectSize(BFI);
12877     if (FS != BFS || FS != (int)Bytes) return false;
12878     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12879   }
12880 
12881   SDValue Base1 = Loc, Base2 = BaseLoc;
12882   int64_t Offset1 = 0, Offset2 = 0;
12883   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12884   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12885   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12886     return true;
12887 
12888   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12889   const GlobalValue *GV1 = nullptr;
12890   const GlobalValue *GV2 = nullptr;
12891   Offset1 = 0;
12892   Offset2 = 0;
12893   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12894   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12895   if (isGA1 && isGA2 && GV1 == GV2)
12896     return Offset1 == (Offset2 + Dist*Bytes);
12897   return false;
12898 }
12899 
12900 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12901 // not enforce equality of the chain operands.
12902 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12903                             unsigned Bytes, int Dist,
12904                             SelectionDAG &DAG) {
12905   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12906     EVT VT = LS->getMemoryVT();
12907     SDValue Loc = LS->getBasePtr();
12908     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12909   }
12910 
12911   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12912     EVT VT;
12913     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12914     default: return false;
12915     case Intrinsic::ppc_altivec_lvx:
12916     case Intrinsic::ppc_altivec_lvxl:
12917     case Intrinsic::ppc_vsx_lxvw4x:
12918     case Intrinsic::ppc_vsx_lxvw4x_be:
12919       VT = MVT::v4i32;
12920       break;
12921     case Intrinsic::ppc_vsx_lxvd2x:
12922     case Intrinsic::ppc_vsx_lxvd2x_be:
12923       VT = MVT::v2f64;
12924       break;
12925     case Intrinsic::ppc_altivec_lvebx:
12926       VT = MVT::i8;
12927       break;
12928     case Intrinsic::ppc_altivec_lvehx:
12929       VT = MVT::i16;
12930       break;
12931     case Intrinsic::ppc_altivec_lvewx:
12932       VT = MVT::i32;
12933       break;
12934     }
12935 
12936     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12937   }
12938 
12939   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12940     EVT VT;
12941     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12942     default: return false;
12943     case Intrinsic::ppc_altivec_stvx:
12944     case Intrinsic::ppc_altivec_stvxl:
12945     case Intrinsic::ppc_vsx_stxvw4x:
12946       VT = MVT::v4i32;
12947       break;
12948     case Intrinsic::ppc_vsx_stxvd2x:
12949       VT = MVT::v2f64;
12950       break;
12951     case Intrinsic::ppc_vsx_stxvw4x_be:
12952       VT = MVT::v4i32;
12953       break;
12954     case Intrinsic::ppc_vsx_stxvd2x_be:
12955       VT = MVT::v2f64;
12956       break;
12957     case Intrinsic::ppc_altivec_stvebx:
12958       VT = MVT::i8;
12959       break;
12960     case Intrinsic::ppc_altivec_stvehx:
12961       VT = MVT::i16;
12962       break;
12963     case Intrinsic::ppc_altivec_stvewx:
12964       VT = MVT::i32;
12965       break;
12966     }
12967 
12968     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12969   }
12970 
12971   return false;
12972 }
12973 
12974 // Return true is there is a nearyby consecutive load to the one provided
12975 // (regardless of alignment). We search up and down the chain, looking though
12976 // token factors and other loads (but nothing else). As a result, a true result
12977 // indicates that it is safe to create a new consecutive load adjacent to the
12978 // load provided.
12979 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
12980   SDValue Chain = LD->getChain();
12981   EVT VT = LD->getMemoryVT();
12982 
12983   SmallSet<SDNode *, 16> LoadRoots;
12984   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12985   SmallSet<SDNode *, 16> Visited;
12986 
12987   // First, search up the chain, branching to follow all token-factor operands.
12988   // If we find a consecutive load, then we're done, otherwise, record all
12989   // nodes just above the top-level loads and token factors.
12990   while (!Queue.empty()) {
12991     SDNode *ChainNext = Queue.pop_back_val();
12992     if (!Visited.insert(ChainNext).second)
12993       continue;
12994 
12995     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12996       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12997         return true;
12998 
12999       if (!Visited.count(ChainLD->getChain().getNode()))
13000         Queue.push_back(ChainLD->getChain().getNode());
13001     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13002       for (const SDUse &O : ChainNext->ops())
13003         if (!Visited.count(O.getNode()))
13004           Queue.push_back(O.getNode());
13005     } else
13006       LoadRoots.insert(ChainNext);
13007   }
13008 
13009   // Second, search down the chain, starting from the top-level nodes recorded
13010   // in the first phase. These top-level nodes are the nodes just above all
13011   // loads and token factors. Starting with their uses, recursively look though
13012   // all loads (just the chain uses) and token factors to find a consecutive
13013   // load.
13014   Visited.clear();
13015   Queue.clear();
13016 
13017   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
13018        IE = LoadRoots.end(); I != IE; ++I) {
13019     Queue.push_back(*I);
13020 
13021     while (!Queue.empty()) {
13022       SDNode *LoadRoot = Queue.pop_back_val();
13023       if (!Visited.insert(LoadRoot).second)
13024         continue;
13025 
13026       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13027         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13028           return true;
13029 
13030       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
13031            UE = LoadRoot->use_end(); UI != UE; ++UI)
13032         if (((isa<MemSDNode>(*UI) &&
13033             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
13034             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
13035           Queue.push_back(*UI);
13036     }
13037   }
13038 
13039   return false;
13040 }
13041 
13042 /// This function is called when we have proved that a SETCC node can be replaced
13043 /// by subtraction (and other supporting instructions) so that the result of
13044 /// comparison is kept in a GPR instead of CR. This function is purely for
13045 /// codegen purposes and has some flags to guide the codegen process.
13046 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13047                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13048   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13049 
13050   // Zero extend the operands to the largest legal integer. Originally, they
13051   // must be of a strictly smaller size.
13052   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13053                          DAG.getConstant(Size, DL, MVT::i32));
13054   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13055                          DAG.getConstant(Size, DL, MVT::i32));
13056 
13057   // Swap if needed. Depends on the condition code.
13058   if (Swap)
13059     std::swap(Op0, Op1);
13060 
13061   // Subtract extended integers.
13062   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13063 
13064   // Move the sign bit to the least significant position and zero out the rest.
13065   // Now the least significant bit carries the result of original comparison.
13066   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13067                              DAG.getConstant(Size - 1, DL, MVT::i32));
13068   auto Final = Shifted;
13069 
13070   // Complement the result if needed. Based on the condition code.
13071   if (Complement)
13072     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13073                         DAG.getConstant(1, DL, MVT::i64));
13074 
13075   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13076 }
13077 
13078 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13079                                                   DAGCombinerInfo &DCI) const {
13080   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13081 
13082   SelectionDAG &DAG = DCI.DAG;
13083   SDLoc DL(N);
13084 
13085   // Size of integers being compared has a critical role in the following
13086   // analysis, so we prefer to do this when all types are legal.
13087   if (!DCI.isAfterLegalizeDAG())
13088     return SDValue();
13089 
13090   // If all users of SETCC extend its value to a legal integer type
13091   // then we replace SETCC with a subtraction
13092   for (SDNode::use_iterator UI = N->use_begin(),
13093        UE = N->use_end(); UI != UE; ++UI) {
13094     if (UI->getOpcode() != ISD::ZERO_EXTEND)
13095       return SDValue();
13096   }
13097 
13098   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13099   auto OpSize = N->getOperand(0).getValueSizeInBits();
13100 
13101   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
13102 
13103   if (OpSize < Size) {
13104     switch (CC) {
13105     default: break;
13106     case ISD::SETULT:
13107       return generateEquivalentSub(N, Size, false, false, DL, DAG);
13108     case ISD::SETULE:
13109       return generateEquivalentSub(N, Size, true, true, DL, DAG);
13110     case ISD::SETUGT:
13111       return generateEquivalentSub(N, Size, false, true, DL, DAG);
13112     case ISD::SETUGE:
13113       return generateEquivalentSub(N, Size, true, false, DL, DAG);
13114     }
13115   }
13116 
13117   return SDValue();
13118 }
13119 
13120 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13121                                                   DAGCombinerInfo &DCI) const {
13122   SelectionDAG &DAG = DCI.DAG;
13123   SDLoc dl(N);
13124 
13125   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13126   // If we're tracking CR bits, we need to be careful that we don't have:
13127   //   trunc(binary-ops(zext(x), zext(y)))
13128   // or
13129   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13130   // such that we're unnecessarily moving things into GPRs when it would be
13131   // better to keep them in CR bits.
13132 
13133   // Note that trunc here can be an actual i1 trunc, or can be the effective
13134   // truncation that comes from a setcc or select_cc.
13135   if (N->getOpcode() == ISD::TRUNCATE &&
13136       N->getValueType(0) != MVT::i1)
13137     return SDValue();
13138 
13139   if (N->getOperand(0).getValueType() != MVT::i32 &&
13140       N->getOperand(0).getValueType() != MVT::i64)
13141     return SDValue();
13142 
13143   if (N->getOpcode() == ISD::SETCC ||
13144       N->getOpcode() == ISD::SELECT_CC) {
13145     // If we're looking at a comparison, then we need to make sure that the
13146     // high bits (all except for the first) don't matter the result.
13147     ISD::CondCode CC =
13148       cast<CondCodeSDNode>(N->getOperand(
13149         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13150     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13151 
13152     if (ISD::isSignedIntSetCC(CC)) {
13153       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13154           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13155         return SDValue();
13156     } else if (ISD::isUnsignedIntSetCC(CC)) {
13157       if (!DAG.MaskedValueIsZero(N->getOperand(0),
13158                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13159           !DAG.MaskedValueIsZero(N->getOperand(1),
13160                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
13161         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13162                                              : SDValue());
13163     } else {
13164       // This is neither a signed nor an unsigned comparison, just make sure
13165       // that the high bits are equal.
13166       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13167       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13168 
13169       // We don't really care about what is known about the first bit (if
13170       // anything), so clear it in all masks prior to comparing them.
13171       Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0);
13172       Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0);
13173 
13174       if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One)
13175         return SDValue();
13176     }
13177   }
13178 
13179   // We now know that the higher-order bits are irrelevant, we just need to
13180   // make sure that all of the intermediate operations are bit operations, and
13181   // all inputs are extensions.
13182   if (N->getOperand(0).getOpcode() != ISD::AND &&
13183       N->getOperand(0).getOpcode() != ISD::OR  &&
13184       N->getOperand(0).getOpcode() != ISD::XOR &&
13185       N->getOperand(0).getOpcode() != ISD::SELECT &&
13186       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13187       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13188       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13189       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13190       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13191     return SDValue();
13192 
13193   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13194       N->getOperand(1).getOpcode() != ISD::AND &&
13195       N->getOperand(1).getOpcode() != ISD::OR  &&
13196       N->getOperand(1).getOpcode() != ISD::XOR &&
13197       N->getOperand(1).getOpcode() != ISD::SELECT &&
13198       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13199       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13200       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13201       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13202       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13203     return SDValue();
13204 
13205   SmallVector<SDValue, 4> Inputs;
13206   SmallVector<SDValue, 8> BinOps, PromOps;
13207   SmallPtrSet<SDNode *, 16> Visited;
13208 
13209   for (unsigned i = 0; i < 2; ++i) {
13210     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13211           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13212           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13213           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13214         isa<ConstantSDNode>(N->getOperand(i)))
13215       Inputs.push_back(N->getOperand(i));
13216     else
13217       BinOps.push_back(N->getOperand(i));
13218 
13219     if (N->getOpcode() == ISD::TRUNCATE)
13220       break;
13221   }
13222 
13223   // Visit all inputs, collect all binary operations (and, or, xor and
13224   // select) that are all fed by extensions.
13225   while (!BinOps.empty()) {
13226     SDValue BinOp = BinOps.back();
13227     BinOps.pop_back();
13228 
13229     if (!Visited.insert(BinOp.getNode()).second)
13230       continue;
13231 
13232     PromOps.push_back(BinOp);
13233 
13234     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13235       // The condition of the select is not promoted.
13236       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13237         continue;
13238       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13239         continue;
13240 
13241       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13242             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13243             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13244            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13245           isa<ConstantSDNode>(BinOp.getOperand(i))) {
13246         Inputs.push_back(BinOp.getOperand(i));
13247       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13248                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
13249                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13250                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13251                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13252                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13253                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13254                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13255                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13256         BinOps.push_back(BinOp.getOperand(i));
13257       } else {
13258         // We have an input that is not an extension or another binary
13259         // operation; we'll abort this transformation.
13260         return SDValue();
13261       }
13262     }
13263   }
13264 
13265   // Make sure that this is a self-contained cluster of operations (which
13266   // is not quite the same thing as saying that everything has only one
13267   // use).
13268   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13269     if (isa<ConstantSDNode>(Inputs[i]))
13270       continue;
13271 
13272     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13273                               UE = Inputs[i].getNode()->use_end();
13274          UI != UE; ++UI) {
13275       SDNode *User = *UI;
13276       if (User != N && !Visited.count(User))
13277         return SDValue();
13278 
13279       // Make sure that we're not going to promote the non-output-value
13280       // operand(s) or SELECT or SELECT_CC.
13281       // FIXME: Although we could sometimes handle this, and it does occur in
13282       // practice that one of the condition inputs to the select is also one of
13283       // the outputs, we currently can't deal with this.
13284       if (User->getOpcode() == ISD::SELECT) {
13285         if (User->getOperand(0) == Inputs[i])
13286           return SDValue();
13287       } else if (User->getOpcode() == ISD::SELECT_CC) {
13288         if (User->getOperand(0) == Inputs[i] ||
13289             User->getOperand(1) == Inputs[i])
13290           return SDValue();
13291       }
13292     }
13293   }
13294 
13295   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13296     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13297                               UE = PromOps[i].getNode()->use_end();
13298          UI != UE; ++UI) {
13299       SDNode *User = *UI;
13300       if (User != N && !Visited.count(User))
13301         return SDValue();
13302 
13303       // Make sure that we're not going to promote the non-output-value
13304       // operand(s) or SELECT or SELECT_CC.
13305       // FIXME: Although we could sometimes handle this, and it does occur in
13306       // practice that one of the condition inputs to the select is also one of
13307       // the outputs, we currently can't deal with this.
13308       if (User->getOpcode() == ISD::SELECT) {
13309         if (User->getOperand(0) == PromOps[i])
13310           return SDValue();
13311       } else if (User->getOpcode() == ISD::SELECT_CC) {
13312         if (User->getOperand(0) == PromOps[i] ||
13313             User->getOperand(1) == PromOps[i])
13314           return SDValue();
13315       }
13316     }
13317   }
13318 
13319   // Replace all inputs with the extension operand.
13320   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13321     // Constants may have users outside the cluster of to-be-promoted nodes,
13322     // and so we need to replace those as we do the promotions.
13323     if (isa<ConstantSDNode>(Inputs[i]))
13324       continue;
13325     else
13326       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13327   }
13328 
13329   std::list<HandleSDNode> PromOpHandles;
13330   for (auto &PromOp : PromOps)
13331     PromOpHandles.emplace_back(PromOp);
13332 
13333   // Replace all operations (these are all the same, but have a different
13334   // (i1) return type). DAG.getNode will validate that the types of
13335   // a binary operator match, so go through the list in reverse so that
13336   // we've likely promoted both operands first. Any intermediate truncations or
13337   // extensions disappear.
13338   while (!PromOpHandles.empty()) {
13339     SDValue PromOp = PromOpHandles.back().getValue();
13340     PromOpHandles.pop_back();
13341 
13342     if (PromOp.getOpcode() == ISD::TRUNCATE ||
13343         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13344         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13345         PromOp.getOpcode() == ISD::ANY_EXTEND) {
13346       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13347           PromOp.getOperand(0).getValueType() != MVT::i1) {
13348         // The operand is not yet ready (see comment below).
13349         PromOpHandles.emplace_front(PromOp);
13350         continue;
13351       }
13352 
13353       SDValue RepValue = PromOp.getOperand(0);
13354       if (isa<ConstantSDNode>(RepValue))
13355         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13356 
13357       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13358       continue;
13359     }
13360 
13361     unsigned C;
13362     switch (PromOp.getOpcode()) {
13363     default:             C = 0; break;
13364     case ISD::SELECT:    C = 1; break;
13365     case ISD::SELECT_CC: C = 2; break;
13366     }
13367 
13368     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13369          PromOp.getOperand(C).getValueType() != MVT::i1) ||
13370         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13371          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13372       // The to-be-promoted operands of this node have not yet been
13373       // promoted (this should be rare because we're going through the
13374       // list backward, but if one of the operands has several users in
13375       // this cluster of to-be-promoted nodes, it is possible).
13376       PromOpHandles.emplace_front(PromOp);
13377       continue;
13378     }
13379 
13380     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13381                                 PromOp.getNode()->op_end());
13382 
13383     // If there are any constant inputs, make sure they're replaced now.
13384     for (unsigned i = 0; i < 2; ++i)
13385       if (isa<ConstantSDNode>(Ops[C+i]))
13386         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13387 
13388     DAG.ReplaceAllUsesOfValueWith(PromOp,
13389       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13390   }
13391 
13392   // Now we're left with the initial truncation itself.
13393   if (N->getOpcode() == ISD::TRUNCATE)
13394     return N->getOperand(0);
13395 
13396   // Otherwise, this is a comparison. The operands to be compared have just
13397   // changed type (to i1), but everything else is the same.
13398   return SDValue(N, 0);
13399 }
13400 
13401 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13402                                                   DAGCombinerInfo &DCI) const {
13403   SelectionDAG &DAG = DCI.DAG;
13404   SDLoc dl(N);
13405 
13406   // If we're tracking CR bits, we need to be careful that we don't have:
13407   //   zext(binary-ops(trunc(x), trunc(y)))
13408   // or
13409   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13410   // such that we're unnecessarily moving things into CR bits that can more
13411   // efficiently stay in GPRs. Note that if we're not certain that the high
13412   // bits are set as required by the final extension, we still may need to do
13413   // some masking to get the proper behavior.
13414 
13415   // This same functionality is important on PPC64 when dealing with
13416   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13417   // the return values of functions. Because it is so similar, it is handled
13418   // here as well.
13419 
13420   if (N->getValueType(0) != MVT::i32 &&
13421       N->getValueType(0) != MVT::i64)
13422     return SDValue();
13423 
13424   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13425         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13426     return SDValue();
13427 
13428   if (N->getOperand(0).getOpcode() != ISD::AND &&
13429       N->getOperand(0).getOpcode() != ISD::OR  &&
13430       N->getOperand(0).getOpcode() != ISD::XOR &&
13431       N->getOperand(0).getOpcode() != ISD::SELECT &&
13432       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13433     return SDValue();
13434 
13435   SmallVector<SDValue, 4> Inputs;
13436   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13437   SmallPtrSet<SDNode *, 16> Visited;
13438 
13439   // Visit all inputs, collect all binary operations (and, or, xor and
13440   // select) that are all fed by truncations.
13441   while (!BinOps.empty()) {
13442     SDValue BinOp = BinOps.back();
13443     BinOps.pop_back();
13444 
13445     if (!Visited.insert(BinOp.getNode()).second)
13446       continue;
13447 
13448     PromOps.push_back(BinOp);
13449 
13450     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13451       // The condition of the select is not promoted.
13452       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13453         continue;
13454       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13455         continue;
13456 
13457       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13458           isa<ConstantSDNode>(BinOp.getOperand(i))) {
13459         Inputs.push_back(BinOp.getOperand(i));
13460       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13461                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
13462                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13463                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13464                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13465         BinOps.push_back(BinOp.getOperand(i));
13466       } else {
13467         // We have an input that is not a truncation or another binary
13468         // operation; we'll abort this transformation.
13469         return SDValue();
13470       }
13471     }
13472   }
13473 
13474   // The operands of a select that must be truncated when the select is
13475   // promoted because the operand is actually part of the to-be-promoted set.
13476   DenseMap<SDNode *, EVT> SelectTruncOp[2];
13477 
13478   // Make sure that this is a self-contained cluster of operations (which
13479   // is not quite the same thing as saying that everything has only one
13480   // use).
13481   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13482     if (isa<ConstantSDNode>(Inputs[i]))
13483       continue;
13484 
13485     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
13486                               UE = Inputs[i].getNode()->use_end();
13487          UI != UE; ++UI) {
13488       SDNode *User = *UI;
13489       if (User != N && !Visited.count(User))
13490         return SDValue();
13491 
13492       // If we're going to promote the non-output-value operand(s) or SELECT or
13493       // SELECT_CC, record them for truncation.
13494       if (User->getOpcode() == ISD::SELECT) {
13495         if (User->getOperand(0) == Inputs[i])
13496           SelectTruncOp[0].insert(std::make_pair(User,
13497                                     User->getOperand(0).getValueType()));
13498       } else if (User->getOpcode() == ISD::SELECT_CC) {
13499         if (User->getOperand(0) == Inputs[i])
13500           SelectTruncOp[0].insert(std::make_pair(User,
13501                                     User->getOperand(0).getValueType()));
13502         if (User->getOperand(1) == Inputs[i])
13503           SelectTruncOp[1].insert(std::make_pair(User,
13504                                     User->getOperand(1).getValueType()));
13505       }
13506     }
13507   }
13508 
13509   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13510     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
13511                               UE = PromOps[i].getNode()->use_end();
13512          UI != UE; ++UI) {
13513       SDNode *User = *UI;
13514       if (User != N && !Visited.count(User))
13515         return SDValue();
13516 
13517       // If we're going to promote the non-output-value operand(s) or SELECT or
13518       // SELECT_CC, record them for truncation.
13519       if (User->getOpcode() == ISD::SELECT) {
13520         if (User->getOperand(0) == PromOps[i])
13521           SelectTruncOp[0].insert(std::make_pair(User,
13522                                     User->getOperand(0).getValueType()));
13523       } else if (User->getOpcode() == ISD::SELECT_CC) {
13524         if (User->getOperand(0) == PromOps[i])
13525           SelectTruncOp[0].insert(std::make_pair(User,
13526                                     User->getOperand(0).getValueType()));
13527         if (User->getOperand(1) == PromOps[i])
13528           SelectTruncOp[1].insert(std::make_pair(User,
13529                                     User->getOperand(1).getValueType()));
13530       }
13531     }
13532   }
13533 
13534   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13535   bool ReallyNeedsExt = false;
13536   if (N->getOpcode() != ISD::ANY_EXTEND) {
13537     // If all of the inputs are not already sign/zero extended, then
13538     // we'll still need to do that at the end.
13539     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13540       if (isa<ConstantSDNode>(Inputs[i]))
13541         continue;
13542 
13543       unsigned OpBits =
13544         Inputs[i].getOperand(0).getValueSizeInBits();
13545       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13546 
13547       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13548            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13549                                   APInt::getHighBitsSet(OpBits,
13550                                                         OpBits-PromBits))) ||
13551           (N->getOpcode() == ISD::SIGN_EXTEND &&
13552            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13553              (OpBits-(PromBits-1)))) {
13554         ReallyNeedsExt = true;
13555         break;
13556       }
13557     }
13558   }
13559 
13560   // Replace all inputs, either with the truncation operand, or a
13561   // truncation or extension to the final output type.
13562   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13563     // Constant inputs need to be replaced with the to-be-promoted nodes that
13564     // use them because they might have users outside of the cluster of
13565     // promoted nodes.
13566     if (isa<ConstantSDNode>(Inputs[i]))
13567       continue;
13568 
13569     SDValue InSrc = Inputs[i].getOperand(0);
13570     if (Inputs[i].getValueType() == N->getValueType(0))
13571       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13572     else if (N->getOpcode() == ISD::SIGN_EXTEND)
13573       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13574         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13575     else if (N->getOpcode() == ISD::ZERO_EXTEND)
13576       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13577         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13578     else
13579       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13580         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13581   }
13582 
13583   std::list<HandleSDNode> PromOpHandles;
13584   for (auto &PromOp : PromOps)
13585     PromOpHandles.emplace_back(PromOp);
13586 
13587   // Replace all operations (these are all the same, but have a different
13588   // (promoted) return type). DAG.getNode will validate that the types of
13589   // a binary operator match, so go through the list in reverse so that
13590   // we've likely promoted both operands first.
13591   while (!PromOpHandles.empty()) {
13592     SDValue PromOp = PromOpHandles.back().getValue();
13593     PromOpHandles.pop_back();
13594 
13595     unsigned C;
13596     switch (PromOp.getOpcode()) {
13597     default:             C = 0; break;
13598     case ISD::SELECT:    C = 1; break;
13599     case ISD::SELECT_CC: C = 2; break;
13600     }
13601 
13602     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13603          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13604         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13605          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13606       // The to-be-promoted operands of this node have not yet been
13607       // promoted (this should be rare because we're going through the
13608       // list backward, but if one of the operands has several users in
13609       // this cluster of to-be-promoted nodes, it is possible).
13610       PromOpHandles.emplace_front(PromOp);
13611       continue;
13612     }
13613 
13614     // For SELECT and SELECT_CC nodes, we do a similar check for any
13615     // to-be-promoted comparison inputs.
13616     if (PromOp.getOpcode() == ISD::SELECT ||
13617         PromOp.getOpcode() == ISD::SELECT_CC) {
13618       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13619            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13620           (SelectTruncOp[1].count(PromOp.getNode()) &&
13621            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13622         PromOpHandles.emplace_front(PromOp);
13623         continue;
13624       }
13625     }
13626 
13627     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13628                                 PromOp.getNode()->op_end());
13629 
13630     // If this node has constant inputs, then they'll need to be promoted here.
13631     for (unsigned i = 0; i < 2; ++i) {
13632       if (!isa<ConstantSDNode>(Ops[C+i]))
13633         continue;
13634       if (Ops[C+i].getValueType() == N->getValueType(0))
13635         continue;
13636 
13637       if (N->getOpcode() == ISD::SIGN_EXTEND)
13638         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13639       else if (N->getOpcode() == ISD::ZERO_EXTEND)
13640         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13641       else
13642         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13643     }
13644 
13645     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13646     // truncate them again to the original value type.
13647     if (PromOp.getOpcode() == ISD::SELECT ||
13648         PromOp.getOpcode() == ISD::SELECT_CC) {
13649       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13650       if (SI0 != SelectTruncOp[0].end())
13651         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13652       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13653       if (SI1 != SelectTruncOp[1].end())
13654         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13655     }
13656 
13657     DAG.ReplaceAllUsesOfValueWith(PromOp,
13658       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13659   }
13660 
13661   // Now we're left with the initial extension itself.
13662   if (!ReallyNeedsExt)
13663     return N->getOperand(0);
13664 
13665   // To zero extend, just mask off everything except for the first bit (in the
13666   // i1 case).
13667   if (N->getOpcode() == ISD::ZERO_EXTEND)
13668     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13669                        DAG.getConstant(APInt::getLowBitsSet(
13670                                          N->getValueSizeInBits(0), PromBits),
13671                                        dl, N->getValueType(0)));
13672 
13673   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13674          "Invalid extension type");
13675   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13676   SDValue ShiftCst =
13677       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13678   return DAG.getNode(
13679       ISD::SRA, dl, N->getValueType(0),
13680       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13681       ShiftCst);
13682 }
13683 
13684 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13685                                         DAGCombinerInfo &DCI) const {
13686   assert(N->getOpcode() == ISD::SETCC &&
13687          "Should be called with a SETCC node");
13688 
13689   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13690   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13691     SDValue LHS = N->getOperand(0);
13692     SDValue RHS = N->getOperand(1);
13693 
13694     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13695     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13696         LHS.hasOneUse())
13697       std::swap(LHS, RHS);
13698 
13699     // x == 0-y --> x+y == 0
13700     // x != 0-y --> x+y != 0
13701     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13702         RHS.hasOneUse()) {
13703       SDLoc DL(N);
13704       SelectionDAG &DAG = DCI.DAG;
13705       EVT VT = N->getValueType(0);
13706       EVT OpVT = LHS.getValueType();
13707       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13708       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13709     }
13710   }
13711 
13712   return DAGCombineTruncBoolExt(N, DCI);
13713 }
13714 
13715 // Is this an extending load from an f32 to an f64?
13716 static bool isFPExtLoad(SDValue Op) {
13717   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13718     return LD->getExtensionType() == ISD::EXTLOAD &&
13719       Op.getValueType() == MVT::f64;
13720   return false;
13721 }
13722 
13723 /// Reduces the number of fp-to-int conversion when building a vector.
13724 ///
13725 /// If this vector is built out of floating to integer conversions,
13726 /// transform it to a vector built out of floating point values followed by a
13727 /// single floating to integer conversion of the vector.
13728 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
13729 /// becomes (fptosi (build_vector ($A, $B, ...)))
13730 SDValue PPCTargetLowering::
13731 combineElementTruncationToVectorTruncation(SDNode *N,
13732                                            DAGCombinerInfo &DCI) const {
13733   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13734          "Should be called with a BUILD_VECTOR node");
13735 
13736   SelectionDAG &DAG = DCI.DAG;
13737   SDLoc dl(N);
13738 
13739   SDValue FirstInput = N->getOperand(0);
13740   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13741          "The input operand must be an fp-to-int conversion.");
13742 
13743   // This combine happens after legalization so the fp_to_[su]i nodes are
13744   // already converted to PPCSISD nodes.
13745   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13746   if (FirstConversion == PPCISD::FCTIDZ ||
13747       FirstConversion == PPCISD::FCTIDUZ ||
13748       FirstConversion == PPCISD::FCTIWZ ||
13749       FirstConversion == PPCISD::FCTIWUZ) {
13750     bool IsSplat = true;
13751     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13752       FirstConversion == PPCISD::FCTIWUZ;
13753     EVT SrcVT = FirstInput.getOperand(0).getValueType();
13754     SmallVector<SDValue, 4> Ops;
13755     EVT TargetVT = N->getValueType(0);
13756     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13757       SDValue NextOp = N->getOperand(i);
13758       if (NextOp.getOpcode() != PPCISD::MFVSR)
13759         return SDValue();
13760       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13761       if (NextConversion != FirstConversion)
13762         return SDValue();
13763       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13764       // This is not valid if the input was originally double precision. It is
13765       // also not profitable to do unless this is an extending load in which
13766       // case doing this combine will allow us to combine consecutive loads.
13767       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13768         return SDValue();
13769       if (N->getOperand(i) != FirstInput)
13770         IsSplat = false;
13771     }
13772 
13773     // If this is a splat, we leave it as-is since there will be only a single
13774     // fp-to-int conversion followed by a splat of the integer. This is better
13775     // for 32-bit and smaller ints and neutral for 64-bit ints.
13776     if (IsSplat)
13777       return SDValue();
13778 
13779     // Now that we know we have the right type of node, get its operands
13780     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13781       SDValue In = N->getOperand(i).getOperand(0);
13782       if (Is32Bit) {
13783         // For 32-bit values, we need to add an FP_ROUND node (if we made it
13784         // here, we know that all inputs are extending loads so this is safe).
13785         if (In.isUndef())
13786           Ops.push_back(DAG.getUNDEF(SrcVT));
13787         else {
13788           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13789                                       MVT::f32, In.getOperand(0),
13790                                       DAG.getIntPtrConstant(1, dl));
13791           Ops.push_back(Trunc);
13792         }
13793       } else
13794         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13795     }
13796 
13797     unsigned Opcode;
13798     if (FirstConversion == PPCISD::FCTIDZ ||
13799         FirstConversion == PPCISD::FCTIWZ)
13800       Opcode = ISD::FP_TO_SINT;
13801     else
13802       Opcode = ISD::FP_TO_UINT;
13803 
13804     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13805     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13806     return DAG.getNode(Opcode, dl, TargetVT, BV);
13807   }
13808   return SDValue();
13809 }
13810 
13811 /// Reduce the number of loads when building a vector.
13812 ///
13813 /// Building a vector out of multiple loads can be converted to a load
13814 /// of the vector type if the loads are consecutive. If the loads are
13815 /// consecutive but in descending order, a shuffle is added at the end
13816 /// to reorder the vector.
13817 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
13818   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13819          "Should be called with a BUILD_VECTOR node");
13820 
13821   SDLoc dl(N);
13822 
13823   // Return early for non byte-sized type, as they can't be consecutive.
13824   if (!N->getValueType(0).getVectorElementType().isByteSized())
13825     return SDValue();
13826 
13827   bool InputsAreConsecutiveLoads = true;
13828   bool InputsAreReverseConsecutive = true;
13829   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13830   SDValue FirstInput = N->getOperand(0);
13831   bool IsRoundOfExtLoad = false;
13832 
13833   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13834       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13835     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13836     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13837   }
13838   // Not a build vector of (possibly fp_rounded) loads.
13839   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13840       N->getNumOperands() == 1)
13841     return SDValue();
13842 
13843   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13844     // If any inputs are fp_round(extload), they all must be.
13845     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13846       return SDValue();
13847 
13848     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13849       N->getOperand(i);
13850     if (NextInput.getOpcode() != ISD::LOAD)
13851       return SDValue();
13852 
13853     SDValue PreviousInput =
13854       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13855     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13856     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13857 
13858     // If any inputs are fp_round(extload), they all must be.
13859     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13860       return SDValue();
13861 
13862     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13863       InputsAreConsecutiveLoads = false;
13864     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13865       InputsAreReverseConsecutive = false;
13866 
13867     // Exit early if the loads are neither consecutive nor reverse consecutive.
13868     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13869       return SDValue();
13870   }
13871 
13872   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13873          "The loads cannot be both consecutive and reverse consecutive.");
13874 
13875   SDValue FirstLoadOp =
13876     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13877   SDValue LastLoadOp =
13878     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13879                        N->getOperand(N->getNumOperands()-1);
13880 
13881   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13882   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13883   if (InputsAreConsecutiveLoads) {
13884     assert(LD1 && "Input needs to be a LoadSDNode.");
13885     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13886                        LD1->getBasePtr(), LD1->getPointerInfo(),
13887                        LD1->getAlignment());
13888   }
13889   if (InputsAreReverseConsecutive) {
13890     assert(LDL && "Input needs to be a LoadSDNode.");
13891     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13892                                LDL->getBasePtr(), LDL->getPointerInfo(),
13893                                LDL->getAlignment());
13894     SmallVector<int, 16> Ops;
13895     for (int i = N->getNumOperands() - 1; i >= 0; i--)
13896       Ops.push_back(i);
13897 
13898     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13899                                 DAG.getUNDEF(N->getValueType(0)), Ops);
13900   }
13901   return SDValue();
13902 }
13903 
13904 // This function adds the required vector_shuffle needed to get
13905 // the elements of the vector extract in the correct position
13906 // as specified by the CorrectElems encoding.
13907 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
13908                                       SDValue Input, uint64_t Elems,
13909                                       uint64_t CorrectElems) {
13910   SDLoc dl(N);
13911 
13912   unsigned NumElems = Input.getValueType().getVectorNumElements();
13913   SmallVector<int, 16> ShuffleMask(NumElems, -1);
13914 
13915   // Knowing the element indices being extracted from the original
13916   // vector and the order in which they're being inserted, just put
13917   // them at element indices required for the instruction.
13918   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13919     if (DAG.getDataLayout().isLittleEndian())
13920       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13921     else
13922       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13923     CorrectElems = CorrectElems >> 8;
13924     Elems = Elems >> 8;
13925   }
13926 
13927   SDValue Shuffle =
13928       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13929                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13930 
13931   EVT VT = N->getValueType(0);
13932   SDValue Conv = DAG.getBitcast(VT, Shuffle);
13933 
13934   EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13935                                Input.getValueType().getVectorElementType(),
13936                                VT.getVectorNumElements());
13937   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13938                      DAG.getValueType(ExtVT));
13939 }
13940 
13941 // Look for build vector patterns where input operands come from sign
13942 // extended vector_extract elements of specific indices. If the correct indices
13943 // aren't used, add a vector shuffle to fix up the indices and create
13944 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13945 // during instruction selection.
13946 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
13947   // This array encodes the indices that the vector sign extend instructions
13948   // extract from when extending from one type to another for both BE and LE.
13949   // The right nibble of each byte corresponds to the LE incides.
13950   // and the left nibble of each byte corresponds to the BE incides.
13951   // For example: 0x3074B8FC  byte->word
13952   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13953   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13954   // For example: 0x000070F8  byte->double word
13955   // For LE: the allowed indices are: 0x0,0x8
13956   // For BE: the allowed indices are: 0x7,0xF
13957   uint64_t TargetElems[] = {
13958       0x3074B8FC, // b->w
13959       0x000070F8, // b->d
13960       0x10325476, // h->w
13961       0x00003074, // h->d
13962       0x00001032, // w->d
13963   };
13964 
13965   uint64_t Elems = 0;
13966   int Index;
13967   SDValue Input;
13968 
13969   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13970     if (!Op)
13971       return false;
13972     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13973         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13974       return false;
13975 
13976     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13977     // of the right width.
13978     SDValue Extract = Op.getOperand(0);
13979     if (Extract.getOpcode() == ISD::ANY_EXTEND)
13980       Extract = Extract.getOperand(0);
13981     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13982       return false;
13983 
13984     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13985     if (!ExtOp)
13986       return false;
13987 
13988     Index = ExtOp->getZExtValue();
13989     if (Input && Input != Extract.getOperand(0))
13990       return false;
13991 
13992     if (!Input)
13993       Input = Extract.getOperand(0);
13994 
13995     Elems = Elems << 8;
13996     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13997     Elems |= Index;
13998 
13999     return true;
14000   };
14001 
14002   // If the build vector operands aren't sign extended vector extracts,
14003   // of the same input vector, then return.
14004   for (unsigned i = 0; i < N->getNumOperands(); i++) {
14005     if (!isSExtOfVecExtract(N->getOperand(i))) {
14006       return SDValue();
14007     }
14008   }
14009 
14010   // If the vector extract indicies are not correct, add the appropriate
14011   // vector_shuffle.
14012   int TgtElemArrayIdx;
14013   int InputSize = Input.getValueType().getScalarSizeInBits();
14014   int OutputSize = N->getValueType(0).getScalarSizeInBits();
14015   if (InputSize + OutputSize == 40)
14016     TgtElemArrayIdx = 0;
14017   else if (InputSize + OutputSize == 72)
14018     TgtElemArrayIdx = 1;
14019   else if (InputSize + OutputSize == 48)
14020     TgtElemArrayIdx = 2;
14021   else if (InputSize + OutputSize == 80)
14022     TgtElemArrayIdx = 3;
14023   else if (InputSize + OutputSize == 96)
14024     TgtElemArrayIdx = 4;
14025   else
14026     return SDValue();
14027 
14028   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14029   CorrectElems = DAG.getDataLayout().isLittleEndian()
14030                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14031                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14032   if (Elems != CorrectElems) {
14033     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14034   }
14035 
14036   // Regular lowering will catch cases where a shuffle is not needed.
14037   return SDValue();
14038 }
14039 
14040 // Look for the pattern of a load from a narrow width to i128, feeding
14041 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14042 // (LXVRZX). This node represents a zero extending load that will be matched
14043 // to the Load VSX Vector Rightmost instructions.
14044 static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
14045   SDLoc DL(N);
14046 
14047   // This combine is only eligible for a BUILD_VECTOR of v1i128.
14048   if (N->getValueType(0) != MVT::v1i128)
14049     return SDValue();
14050 
14051   SDValue Operand = N->getOperand(0);
14052   // Proceed with the transformation if the operand to the BUILD_VECTOR
14053   // is a load instruction.
14054   if (Operand.getOpcode() != ISD::LOAD)
14055     return SDValue();
14056 
14057   LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
14058   EVT MemoryType = LD->getMemoryVT();
14059 
14060   // This transformation is only valid if the we are loading either a byte,
14061   // halfword, word, or doubleword.
14062   bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14063                      MemoryType == MVT::i32 || MemoryType == MVT::i64;
14064 
14065   // Ensure that the load from the narrow width is being zero extended to i128.
14066   if (!ValidLDType ||
14067       (LD->getExtensionType() != ISD::ZEXTLOAD &&
14068        LD->getExtensionType() != ISD::EXTLOAD))
14069     return SDValue();
14070 
14071   SDValue LoadOps[] = {
14072       LD->getChain(), LD->getBasePtr(),
14073       DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14074 
14075   return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
14076                                  DAG.getVTList(MVT::v1i128, MVT::Other),
14077                                  LoadOps, MemoryType, LD->getMemOperand());
14078 }
14079 
14080 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14081                                                  DAGCombinerInfo &DCI) const {
14082   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14083          "Should be called with a BUILD_VECTOR node");
14084 
14085   SelectionDAG &DAG = DCI.DAG;
14086   SDLoc dl(N);
14087 
14088   if (!Subtarget.hasVSX())
14089     return SDValue();
14090 
14091   // The target independent DAG combiner will leave a build_vector of
14092   // float-to-int conversions intact. We can generate MUCH better code for
14093   // a float-to-int conversion of a vector of floats.
14094   SDValue FirstInput = N->getOperand(0);
14095   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14096     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14097     if (Reduced)
14098       return Reduced;
14099   }
14100 
14101   // If we're building a vector out of consecutive loads, just load that
14102   // vector type.
14103   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14104   if (Reduced)
14105     return Reduced;
14106 
14107   // If we're building a vector out of extended elements from another vector
14108   // we have P9 vector integer extend instructions. The code assumes legal
14109   // input types (i.e. it can't handle things like v4i16) so do not run before
14110   // legalization.
14111   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14112     Reduced = combineBVOfVecSExt(N, DAG);
14113     if (Reduced)
14114       return Reduced;
14115   }
14116 
14117   // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14118   // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14119   // is a load from <valid narrow width> to i128.
14120   if (Subtarget.isISA3_1()) {
14121     SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14122     if (BVOfZLoad)
14123       return BVOfZLoad;
14124   }
14125 
14126   if (N->getValueType(0) != MVT::v2f64)
14127     return SDValue();
14128 
14129   // Looking for:
14130   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14131   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14132       FirstInput.getOpcode() != ISD::UINT_TO_FP)
14133     return SDValue();
14134   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14135       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14136     return SDValue();
14137   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14138     return SDValue();
14139 
14140   SDValue Ext1 = FirstInput.getOperand(0);
14141   SDValue Ext2 = N->getOperand(1).getOperand(0);
14142   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14143      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14144     return SDValue();
14145 
14146   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14147   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14148   if (!Ext1Op || !Ext2Op)
14149     return SDValue();
14150   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14151       Ext1.getOperand(0) != Ext2.getOperand(0))
14152     return SDValue();
14153 
14154   int FirstElem = Ext1Op->getZExtValue();
14155   int SecondElem = Ext2Op->getZExtValue();
14156   int SubvecIdx;
14157   if (FirstElem == 0 && SecondElem == 1)
14158     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14159   else if (FirstElem == 2 && SecondElem == 3)
14160     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14161   else
14162     return SDValue();
14163 
14164   SDValue SrcVec = Ext1.getOperand(0);
14165   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14166     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
14167   return DAG.getNode(NodeType, dl, MVT::v2f64,
14168                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14169 }
14170 
14171 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14172                                               DAGCombinerInfo &DCI) const {
14173   assert((N->getOpcode() == ISD::SINT_TO_FP ||
14174           N->getOpcode() == ISD::UINT_TO_FP) &&
14175          "Need an int -> FP conversion node here");
14176 
14177   if (useSoftFloat() || !Subtarget.has64BitSupport())
14178     return SDValue();
14179 
14180   SelectionDAG &DAG = DCI.DAG;
14181   SDLoc dl(N);
14182   SDValue Op(N, 0);
14183 
14184   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14185   // from the hardware.
14186   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14187     return SDValue();
14188   if (!Op.getOperand(0).getValueType().isSimple())
14189     return SDValue();
14190   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14191       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14192     return SDValue();
14193 
14194   SDValue FirstOperand(Op.getOperand(0));
14195   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14196     (FirstOperand.getValueType() == MVT::i8 ||
14197      FirstOperand.getValueType() == MVT::i16);
14198   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14199     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14200     bool DstDouble = Op.getValueType() == MVT::f64;
14201     unsigned ConvOp = Signed ?
14202       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
14203       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14204     SDValue WidthConst =
14205       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14206                             dl, false);
14207     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14208     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14209     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
14210                                          DAG.getVTList(MVT::f64, MVT::Other),
14211                                          Ops, MVT::i8, LDN->getMemOperand());
14212 
14213     // For signed conversion, we need to sign-extend the value in the VSR
14214     if (Signed) {
14215       SDValue ExtOps[] = { Ld, WidthConst };
14216       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14217       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14218     } else
14219       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14220   }
14221 
14222 
14223   // For i32 intermediate values, unfortunately, the conversion functions
14224   // leave the upper 32 bits of the value are undefined. Within the set of
14225   // scalar instructions, we have no method for zero- or sign-extending the
14226   // value. Thus, we cannot handle i32 intermediate values here.
14227   if (Op.getOperand(0).getValueType() == MVT::i32)
14228     return SDValue();
14229 
14230   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14231          "UINT_TO_FP is supported only with FPCVT");
14232 
14233   // If we have FCFIDS, then use it when converting to single-precision.
14234   // Otherwise, convert to double-precision and then round.
14235   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14236                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14237                                                             : PPCISD::FCFIDS)
14238                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14239                                                             : PPCISD::FCFID);
14240   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14241                   ? MVT::f32
14242                   : MVT::f64;
14243 
14244   // If we're converting from a float, to an int, and back to a float again,
14245   // then we don't need the store/load pair at all.
14246   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14247        Subtarget.hasFPCVT()) ||
14248       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14249     SDValue Src = Op.getOperand(0).getOperand(0);
14250     if (Src.getValueType() == MVT::f32) {
14251       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14252       DCI.AddToWorklist(Src.getNode());
14253     } else if (Src.getValueType() != MVT::f64) {
14254       // Make sure that we don't pick up a ppc_fp128 source value.
14255       return SDValue();
14256     }
14257 
14258     unsigned FCTOp =
14259       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14260                                                         PPCISD::FCTIDUZ;
14261 
14262     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14263     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14264 
14265     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14266       FP = DAG.getNode(ISD::FP_ROUND, dl,
14267                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14268       DCI.AddToWorklist(FP.getNode());
14269     }
14270 
14271     return FP;
14272   }
14273 
14274   return SDValue();
14275 }
14276 
14277 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14278 // builtins) into loads with swaps.
14279 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
14280                                               DAGCombinerInfo &DCI) const {
14281   SelectionDAG &DAG = DCI.DAG;
14282   SDLoc dl(N);
14283   SDValue Chain;
14284   SDValue Base;
14285   MachineMemOperand *MMO;
14286 
14287   switch (N->getOpcode()) {
14288   default:
14289     llvm_unreachable("Unexpected opcode for little endian VSX load");
14290   case ISD::LOAD: {
14291     LoadSDNode *LD = cast<LoadSDNode>(N);
14292     Chain = LD->getChain();
14293     Base = LD->getBasePtr();
14294     MMO = LD->getMemOperand();
14295     // If the MMO suggests this isn't a load of a full vector, leave
14296     // things alone.  For a built-in, we have to make the change for
14297     // correctness, so if there is a size problem that will be a bug.
14298     if (MMO->getSize() < 16)
14299       return SDValue();
14300     break;
14301   }
14302   case ISD::INTRINSIC_W_CHAIN: {
14303     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14304     Chain = Intrin->getChain();
14305     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14306     // us what we want. Get operand 2 instead.
14307     Base = Intrin->getOperand(2);
14308     MMO = Intrin->getMemOperand();
14309     break;
14310   }
14311   }
14312 
14313   MVT VecTy = N->getValueType(0).getSimpleVT();
14314 
14315   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
14316   // aligned and the type is a vector with elements up to 4 bytes
14317   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14318       VecTy.getScalarSizeInBits() <= 32) {
14319     return SDValue();
14320   }
14321 
14322   SDValue LoadOps[] = { Chain, Base };
14323   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
14324                                          DAG.getVTList(MVT::v2f64, MVT::Other),
14325                                          LoadOps, MVT::v2f64, MMO);
14326 
14327   DCI.AddToWorklist(Load.getNode());
14328   Chain = Load.getValue(1);
14329   SDValue Swap = DAG.getNode(
14330       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14331   DCI.AddToWorklist(Swap.getNode());
14332 
14333   // Add a bitcast if the resulting load type doesn't match v2f64.
14334   if (VecTy != MVT::v2f64) {
14335     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14336     DCI.AddToWorklist(N.getNode());
14337     // Package {bitcast value, swap's chain} to match Load's shape.
14338     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14339                        N, Swap.getValue(1));
14340   }
14341 
14342   return Swap;
14343 }
14344 
14345 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14346 // builtins) into stores with swaps.
14347 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
14348                                                DAGCombinerInfo &DCI) const {
14349   SelectionDAG &DAG = DCI.DAG;
14350   SDLoc dl(N);
14351   SDValue Chain;
14352   SDValue Base;
14353   unsigned SrcOpnd;
14354   MachineMemOperand *MMO;
14355 
14356   switch (N->getOpcode()) {
14357   default:
14358     llvm_unreachable("Unexpected opcode for little endian VSX store");
14359   case ISD::STORE: {
14360     StoreSDNode *ST = cast<StoreSDNode>(N);
14361     Chain = ST->getChain();
14362     Base = ST->getBasePtr();
14363     MMO = ST->getMemOperand();
14364     SrcOpnd = 1;
14365     // If the MMO suggests this isn't a store of a full vector, leave
14366     // things alone.  For a built-in, we have to make the change for
14367     // correctness, so if there is a size problem that will be a bug.
14368     if (MMO->getSize() < 16)
14369       return SDValue();
14370     break;
14371   }
14372   case ISD::INTRINSIC_VOID: {
14373     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14374     Chain = Intrin->getChain();
14375     // Intrin->getBasePtr() oddly does not get what we want.
14376     Base = Intrin->getOperand(3);
14377     MMO = Intrin->getMemOperand();
14378     SrcOpnd = 2;
14379     break;
14380   }
14381   }
14382 
14383   SDValue Src = N->getOperand(SrcOpnd);
14384   MVT VecTy = Src.getValueType().getSimpleVT();
14385 
14386   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
14387   // aligned and the type is a vector with elements up to 4 bytes
14388   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14389       VecTy.getScalarSizeInBits() <= 32) {
14390     return SDValue();
14391   }
14392 
14393   // All stores are done as v2f64 and possible bit cast.
14394   if (VecTy != MVT::v2f64) {
14395     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14396     DCI.AddToWorklist(Src.getNode());
14397   }
14398 
14399   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14400                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14401   DCI.AddToWorklist(Swap.getNode());
14402   Chain = Swap.getValue(1);
14403   SDValue StoreOps[] = { Chain, Swap, Base };
14404   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
14405                                           DAG.getVTList(MVT::Other),
14406                                           StoreOps, VecTy, MMO);
14407   DCI.AddToWorklist(Store.getNode());
14408   return Store;
14409 }
14410 
14411 // Handle DAG combine for STORE (FP_TO_INT F).
14412 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14413                                                DAGCombinerInfo &DCI) const {
14414 
14415   SelectionDAG &DAG = DCI.DAG;
14416   SDLoc dl(N);
14417   unsigned Opcode = N->getOperand(1).getOpcode();
14418 
14419   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
14420          && "Not a FP_TO_INT Instruction!");
14421 
14422   SDValue Val = N->getOperand(1).getOperand(0);
14423   EVT Op1VT = N->getOperand(1).getValueType();
14424   EVT ResVT = Val.getValueType();
14425 
14426   if (!isTypeLegal(ResVT))
14427     return SDValue();
14428 
14429   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14430   bool ValidTypeForStoreFltAsInt =
14431         (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14432          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14433 
14434   if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14435       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14436     return SDValue();
14437 
14438   // Extend f32 values to f64
14439   if (ResVT.getScalarSizeInBits() == 32) {
14440     Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14441     DCI.AddToWorklist(Val.getNode());
14442   }
14443 
14444   // Set signed or unsigned conversion opcode.
14445   unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14446                           PPCISD::FP_TO_SINT_IN_VSR :
14447                           PPCISD::FP_TO_UINT_IN_VSR;
14448 
14449   Val = DAG.getNode(ConvOpcode,
14450                     dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14451   DCI.AddToWorklist(Val.getNode());
14452 
14453   // Set number of bytes being converted.
14454   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14455   SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14456                     DAG.getIntPtrConstant(ByteSize, dl, false),
14457                     DAG.getValueType(Op1VT) };
14458 
14459   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
14460           DAG.getVTList(MVT::Other), Ops,
14461           cast<StoreSDNode>(N)->getMemoryVT(),
14462           cast<StoreSDNode>(N)->getMemOperand());
14463 
14464   DCI.AddToWorklist(Val.getNode());
14465   return Val;
14466 }
14467 
14468 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14469   // Check that the source of the element keeps flipping
14470   // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14471   bool PrevElemFromFirstVec = Mask[0] < NumElts;
14472   for (int i = 1, e = Mask.size(); i < e; i++) {
14473     if (PrevElemFromFirstVec && Mask[i] < NumElts)
14474       return false;
14475     if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14476       return false;
14477     PrevElemFromFirstVec = !PrevElemFromFirstVec;
14478   }
14479   return true;
14480 }
14481 
14482 static bool isSplatBV(SDValue Op) {
14483   if (Op.getOpcode() != ISD::BUILD_VECTOR)
14484     return false;
14485   SDValue FirstOp;
14486 
14487   // Find first non-undef input.
14488   for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14489     FirstOp = Op.getOperand(i);
14490     if (!FirstOp.isUndef())
14491       break;
14492   }
14493 
14494   // All inputs are undef or the same as the first non-undef input.
14495   for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14496     if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14497       return false;
14498   return true;
14499 }
14500 
14501 static SDValue isScalarToVec(SDValue Op) {
14502   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14503     return Op;
14504   if (Op.getOpcode() != ISD::BITCAST)
14505     return SDValue();
14506   Op = Op.getOperand(0);
14507   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14508     return Op;
14509   return SDValue();
14510 }
14511 
14512 static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
14513                                             int LHSMaxIdx, int RHSMinIdx,
14514                                             int RHSMaxIdx, int HalfVec) {
14515   for (int i = 0, e = ShuffV.size(); i < e; i++) {
14516     int Idx = ShuffV[i];
14517     if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14518       ShuffV[i] += HalfVec;
14519   }
14520   return;
14521 }
14522 
14523 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14524 // the original is:
14525 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14526 // In such a case, just change the shuffle mask to extract the element
14527 // from the permuted index.
14528 static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
14529   SDLoc dl(OrigSToV);
14530   EVT VT = OrigSToV.getValueType();
14531   assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14532          "Expecting a SCALAR_TO_VECTOR here");
14533   SDValue Input = OrigSToV.getOperand(0);
14534 
14535   if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14536     ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14537     SDValue OrigVector = Input.getOperand(0);
14538 
14539     // Can't handle non-const element indices or different vector types
14540     // for the input to the extract and the output of the scalar_to_vector.
14541     if (Idx && VT == OrigVector.getValueType()) {
14542       SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
14543       NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
14544       return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14545     }
14546   }
14547   return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14548                      OrigSToV.getOperand(0));
14549 }
14550 
14551 // On little endian subtargets, combine shuffles such as:
14552 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14553 // into:
14554 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14555 // because the latter can be matched to a single instruction merge.
14556 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14557 // to put the value into element zero. Adjust the shuffle mask so that the
14558 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
14559 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14560                                                 SelectionDAG &DAG) const {
14561   SDValue LHS = SVN->getOperand(0);
14562   SDValue RHS = SVN->getOperand(1);
14563   auto Mask = SVN->getMask();
14564   int NumElts = LHS.getValueType().getVectorNumElements();
14565   SDValue Res(SVN, 0);
14566   SDLoc dl(SVN);
14567 
14568   // None of these combines are useful on big endian systems since the ISA
14569   // already has a big endian bias.
14570   if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14571     return Res;
14572 
14573   // If this is not a shuffle of a shuffle and the first element comes from
14574   // the second vector, canonicalize to the commuted form. This will make it
14575   // more likely to match one of the single instruction patterns.
14576   if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14577       RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14578     std::swap(LHS, RHS);
14579     Res = DAG.getCommutedVectorShuffle(*SVN);
14580     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14581   }
14582 
14583   // Adjust the shuffle mask if either input vector comes from a
14584   // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14585   // form (to prevent the need for a swap).
14586   SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14587   SDValue SToVLHS = isScalarToVec(LHS);
14588   SDValue SToVRHS = isScalarToVec(RHS);
14589   if (SToVLHS || SToVRHS) {
14590     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14591                             : SToVRHS.getValueType().getVectorNumElements();
14592     int NumEltsOut = ShuffV.size();
14593 
14594     // Initially assume that neither input is permuted. These will be adjusted
14595     // accordingly if either input is.
14596     int LHSMaxIdx = -1;
14597     int RHSMinIdx = -1;
14598     int RHSMaxIdx = -1;
14599     int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14600 
14601     // Get the permuted scalar to vector nodes for the source(s) that come from
14602     // ISD::SCALAR_TO_VECTOR.
14603     if (SToVLHS) {
14604       // Set up the values for the shuffle vector fixup.
14605       LHSMaxIdx = NumEltsOut / NumEltsIn;
14606       SToVLHS = getSToVPermuted(SToVLHS, DAG);
14607       if (SToVLHS.getValueType() != LHS.getValueType())
14608         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14609       LHS = SToVLHS;
14610     }
14611     if (SToVRHS) {
14612       RHSMinIdx = NumEltsOut;
14613       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14614       SToVRHS = getSToVPermuted(SToVRHS, DAG);
14615       if (SToVRHS.getValueType() != RHS.getValueType())
14616         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14617       RHS = SToVRHS;
14618     }
14619 
14620     // Fix up the shuffle mask to reflect where the desired element actually is.
14621     // The minimum and maximum indices that correspond to element zero for both
14622     // the LHS and RHS are computed and will control which shuffle mask entries
14623     // are to be changed. For example, if the RHS is permuted, any shuffle mask
14624     // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14625     // HalfVec to refer to the corresponding element in the permuted vector.
14626     fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14627                                     HalfVec);
14628     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14629 
14630     // We may have simplified away the shuffle. We won't be able to do anything
14631     // further with it here.
14632     if (!isa<ShuffleVectorSDNode>(Res))
14633       return Res;
14634     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14635   }
14636 
14637   // The common case after we commuted the shuffle is that the RHS is a splat
14638   // and we have elements coming in from the splat at indices that are not
14639   // conducive to using a merge.
14640   // Example:
14641   // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14642   if (!isSplatBV(RHS))
14643     return Res;
14644 
14645   // We are looking for a mask such that all even elements are from
14646   // one vector and all odd elements from the other.
14647   if (!isAlternatingShuffMask(Mask, NumElts))
14648     return Res;
14649 
14650   // Adjust the mask so we are pulling in the same index from the splat
14651   // as the index from the interesting vector in consecutive elements.
14652   // Example (even elements from first vector):
14653   // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14654   if (Mask[0] < NumElts)
14655     for (int i = 1, e = Mask.size(); i < e; i += 2)
14656       ShuffV[i] = (ShuffV[i - 1] + NumElts);
14657   // Example (odd elements from first vector):
14658   // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14659   else
14660     for (int i = 0, e = Mask.size(); i < e; i += 2)
14661       ShuffV[i] = (ShuffV[i + 1] + NumElts);
14662 
14663   // If the RHS has undefs, we need to remove them since we may have created
14664   // a shuffle that adds those instead of the splat value.
14665   SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14666   RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14667 
14668   Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14669   return Res;
14670 }
14671 
14672 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14673                                                 LSBaseSDNode *LSBase,
14674                                                 DAGCombinerInfo &DCI) const {
14675   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14676         "Not a reverse memop pattern!");
14677 
14678   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14679     auto Mask = SVN->getMask();
14680     int i = 0;
14681     auto I = Mask.rbegin();
14682     auto E = Mask.rend();
14683 
14684     for (; I != E; ++I) {
14685       if (*I != i)
14686         return false;
14687       i++;
14688     }
14689     return true;
14690   };
14691 
14692   SelectionDAG &DAG = DCI.DAG;
14693   EVT VT = SVN->getValueType(0);
14694 
14695   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14696     return SDValue();
14697 
14698   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14699   // See comment in PPCVSXSwapRemoval.cpp.
14700   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14701   if (!Subtarget.hasP9Vector())
14702     return SDValue();
14703 
14704   if(!IsElementReverse(SVN))
14705     return SDValue();
14706 
14707   if (LSBase->getOpcode() == ISD::LOAD) {
14708     SDLoc dl(SVN);
14709     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14710     return DAG.getMemIntrinsicNode(
14711         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14712         LSBase->getMemoryVT(), LSBase->getMemOperand());
14713   }
14714 
14715   if (LSBase->getOpcode() == ISD::STORE) {
14716     SDLoc dl(LSBase);
14717     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14718                           LSBase->getBasePtr()};
14719     return DAG.getMemIntrinsicNode(
14720         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14721         LSBase->getMemoryVT(), LSBase->getMemOperand());
14722   }
14723 
14724   llvm_unreachable("Expected a load or store node here");
14725 }
14726 
14727 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
14728                                              DAGCombinerInfo &DCI) const {
14729   SelectionDAG &DAG = DCI.DAG;
14730   SDLoc dl(N);
14731   switch (N->getOpcode()) {
14732   default: break;
14733   case ISD::ADD:
14734     return combineADD(N, DCI);
14735   case ISD::SHL:
14736     return combineSHL(N, DCI);
14737   case ISD::SRA:
14738     return combineSRA(N, DCI);
14739   case ISD::SRL:
14740     return combineSRL(N, DCI);
14741   case ISD::MUL:
14742     return combineMUL(N, DCI);
14743   case ISD::FMA:
14744   case PPCISD::FNMSUB:
14745     return combineFMALike(N, DCI);
14746   case PPCISD::SHL:
14747     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14748         return N->getOperand(0);
14749     break;
14750   case PPCISD::SRL:
14751     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14752         return N->getOperand(0);
14753     break;
14754   case PPCISD::SRA:
14755     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14756       if (C->isNullValue() ||   //  0 >>s V -> 0.
14757           C->isAllOnesValue())    // -1 >>s V -> -1.
14758         return N->getOperand(0);
14759     }
14760     break;
14761   case ISD::SIGN_EXTEND:
14762   case ISD::ZERO_EXTEND:
14763   case ISD::ANY_EXTEND:
14764     return DAGCombineExtBoolTrunc(N, DCI);
14765   case ISD::TRUNCATE:
14766     return combineTRUNCATE(N, DCI);
14767   case ISD::SETCC:
14768     if (SDValue CSCC = combineSetCC(N, DCI))
14769       return CSCC;
14770     LLVM_FALLTHROUGH;
14771   case ISD::SELECT_CC:
14772     return DAGCombineTruncBoolExt(N, DCI);
14773   case ISD::SINT_TO_FP:
14774   case ISD::UINT_TO_FP:
14775     return combineFPToIntToFP(N, DCI);
14776   case ISD::VECTOR_SHUFFLE:
14777     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14778       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14779       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14780     }
14781     return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14782   case ISD::STORE: {
14783 
14784     EVT Op1VT = N->getOperand(1).getValueType();
14785     unsigned Opcode = N->getOperand(1).getOpcode();
14786 
14787     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14788       SDValue Val= combineStoreFPToInt(N, DCI);
14789       if (Val)
14790         return Val;
14791     }
14792 
14793     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14794       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14795       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14796       if (Val)
14797         return Val;
14798     }
14799 
14800     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14801     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14802         N->getOperand(1).getNode()->hasOneUse() &&
14803         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14804          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14805 
14806       // STBRX can only handle simple types and it makes no sense to store less
14807       // two bytes in byte-reversed order.
14808       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14809       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14810         break;
14811 
14812       SDValue BSwapOp = N->getOperand(1).getOperand(0);
14813       // Do an any-extend to 32-bits if this is a half-word input.
14814       if (BSwapOp.getValueType() == MVT::i16)
14815         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14816 
14817       // If the type of BSWAP operand is wider than stored memory width
14818       // it need to be shifted to the right side before STBRX.
14819       if (Op1VT.bitsGT(mVT)) {
14820         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14821         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14822                               DAG.getConstant(Shift, dl, MVT::i32));
14823         // Need to truncate if this is a bswap of i64 stored as i32/i16.
14824         if (Op1VT == MVT::i64)
14825           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14826       }
14827 
14828       SDValue Ops[] = {
14829         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14830       };
14831       return
14832         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
14833                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14834                                 cast<StoreSDNode>(N)->getMemOperand());
14835     }
14836 
14837     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
14838     // So it can increase the chance of CSE constant construction.
14839     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14840         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14841       // Need to sign-extended to 64-bits to handle negative values.
14842       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14843       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14844                                     MemVT.getSizeInBits());
14845       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14846 
14847       // DAG.getTruncStore() can't be used here because it doesn't accept
14848       // the general (base + offset) addressing mode.
14849       // So we use UpdateNodeOperands and setTruncatingStore instead.
14850       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14851                              N->getOperand(3));
14852       cast<StoreSDNode>(N)->setTruncatingStore(true);
14853       return SDValue(N, 0);
14854     }
14855 
14856     // For little endian, VSX stores require generating xxswapd/lxvd2x.
14857     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14858     if (Op1VT.isSimple()) {
14859       MVT StoreVT = Op1VT.getSimpleVT();
14860       if (Subtarget.needsSwapsForVSXMemOps() &&
14861           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14862            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14863         return expandVSXStoreForLE(N, DCI);
14864     }
14865     break;
14866   }
14867   case ISD::LOAD: {
14868     LoadSDNode *LD = cast<LoadSDNode>(N);
14869     EVT VT = LD->getValueType(0);
14870 
14871     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14872     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14873     if (VT.isSimple()) {
14874       MVT LoadVT = VT.getSimpleVT();
14875       if (Subtarget.needsSwapsForVSXMemOps() &&
14876           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14877            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14878         return expandVSXLoadForLE(N, DCI);
14879     }
14880 
14881     // We sometimes end up with a 64-bit integer load, from which we extract
14882     // two single-precision floating-point numbers. This happens with
14883     // std::complex<float>, and other similar structures, because of the way we
14884     // canonicalize structure copies. However, if we lack direct moves,
14885     // then the final bitcasts from the extracted integer values to the
14886     // floating-point numbers turn into store/load pairs. Even with direct moves,
14887     // just loading the two floating-point numbers is likely better.
14888     auto ReplaceTwoFloatLoad = [&]() {
14889       if (VT != MVT::i64)
14890         return false;
14891 
14892       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14893           LD->isVolatile())
14894         return false;
14895 
14896       //  We're looking for a sequence like this:
14897       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14898       //      t16: i64 = srl t13, Constant:i32<32>
14899       //    t17: i32 = truncate t16
14900       //  t18: f32 = bitcast t17
14901       //    t19: i32 = truncate t13
14902       //  t20: f32 = bitcast t19
14903 
14904       if (!LD->hasNUsesOfValue(2, 0))
14905         return false;
14906 
14907       auto UI = LD->use_begin();
14908       while (UI.getUse().getResNo() != 0) ++UI;
14909       SDNode *Trunc = *UI++;
14910       while (UI.getUse().getResNo() != 0) ++UI;
14911       SDNode *RightShift = *UI;
14912       if (Trunc->getOpcode() != ISD::TRUNCATE)
14913         std::swap(Trunc, RightShift);
14914 
14915       if (Trunc->getOpcode() != ISD::TRUNCATE ||
14916           Trunc->getValueType(0) != MVT::i32 ||
14917           !Trunc->hasOneUse())
14918         return false;
14919       if (RightShift->getOpcode() != ISD::SRL ||
14920           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14921           RightShift->getConstantOperandVal(1) != 32 ||
14922           !RightShift->hasOneUse())
14923         return false;
14924 
14925       SDNode *Trunc2 = *RightShift->use_begin();
14926       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14927           Trunc2->getValueType(0) != MVT::i32 ||
14928           !Trunc2->hasOneUse())
14929         return false;
14930 
14931       SDNode *Bitcast = *Trunc->use_begin();
14932       SDNode *Bitcast2 = *Trunc2->use_begin();
14933 
14934       if (Bitcast->getOpcode() != ISD::BITCAST ||
14935           Bitcast->getValueType(0) != MVT::f32)
14936         return false;
14937       if (Bitcast2->getOpcode() != ISD::BITCAST ||
14938           Bitcast2->getValueType(0) != MVT::f32)
14939         return false;
14940 
14941       if (Subtarget.isLittleEndian())
14942         std::swap(Bitcast, Bitcast2);
14943 
14944       // Bitcast has the second float (in memory-layout order) and Bitcast2
14945       // has the first one.
14946 
14947       SDValue BasePtr = LD->getBasePtr();
14948       if (LD->isIndexed()) {
14949         assert(LD->getAddressingMode() == ISD::PRE_INC &&
14950                "Non-pre-inc AM on PPC?");
14951         BasePtr =
14952           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14953                       LD->getOffset());
14954       }
14955 
14956       auto MMOFlags =
14957           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14958       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14959                                       LD->getPointerInfo(), LD->getAlignment(),
14960                                       MMOFlags, LD->getAAInfo());
14961       SDValue AddPtr =
14962         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14963                     BasePtr, DAG.getIntPtrConstant(4, dl));
14964       SDValue FloatLoad2 = DAG.getLoad(
14965           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14966           LD->getPointerInfo().getWithOffset(4),
14967           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14968 
14969       if (LD->isIndexed()) {
14970         // Note that DAGCombine should re-form any pre-increment load(s) from
14971         // what is produced here if that makes sense.
14972         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14973       }
14974 
14975       DCI.CombineTo(Bitcast2, FloatLoad);
14976       DCI.CombineTo(Bitcast, FloatLoad2);
14977 
14978       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14979                                     SDValue(FloatLoad2.getNode(), 1));
14980       return true;
14981     };
14982 
14983     if (ReplaceTwoFloatLoad())
14984       return SDValue(N, 0);
14985 
14986     EVT MemVT = LD->getMemoryVT();
14987     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14988     Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14989     if (LD->isUnindexed() && VT.isVector() &&
14990         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14991           // P8 and later hardware should just use LOAD.
14992           !Subtarget.hasP8Vector() &&
14993           (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14994            VT == MVT::v4f32))) &&
14995         LD->getAlign() < ABIAlignment) {
14996       // This is a type-legal unaligned Altivec load.
14997       SDValue Chain = LD->getChain();
14998       SDValue Ptr = LD->getBasePtr();
14999       bool isLittleEndian = Subtarget.isLittleEndian();
15000 
15001       // This implements the loading of unaligned vectors as described in
15002       // the venerable Apple Velocity Engine overview. Specifically:
15003       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15004       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15005       //
15006       // The general idea is to expand a sequence of one or more unaligned
15007       // loads into an alignment-based permutation-control instruction (lvsl
15008       // or lvsr), a series of regular vector loads (which always truncate
15009       // their input address to an aligned address), and a series of
15010       // permutations.  The results of these permutations are the requested
15011       // loaded values.  The trick is that the last "extra" load is not taken
15012       // from the address you might suspect (sizeof(vector) bytes after the
15013       // last requested load), but rather sizeof(vector) - 1 bytes after the
15014       // last requested vector. The point of this is to avoid a page fault if
15015       // the base address happened to be aligned. This works because if the
15016       // base address is aligned, then adding less than a full vector length
15017       // will cause the last vector in the sequence to be (re)loaded.
15018       // Otherwise, the next vector will be fetched as you might suspect was
15019       // necessary.
15020 
15021       // We might be able to reuse the permutation generation from
15022       // a different base address offset from this one by an aligned amount.
15023       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15024       // optimization later.
15025       Intrinsic::ID Intr, IntrLD, IntrPerm;
15026       MVT PermCntlTy, PermTy, LDTy;
15027       Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15028                             : Intrinsic::ppc_altivec_lvsl;
15029       IntrLD = Intrinsic::ppc_altivec_lvx;
15030       IntrPerm = Intrinsic::ppc_altivec_vperm;
15031       PermCntlTy = MVT::v16i8;
15032       PermTy = MVT::v4i32;
15033       LDTy = MVT::v4i32;
15034 
15035       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
15036 
15037       // Create the new MMO for the new base load. It is like the original MMO,
15038       // but represents an area in memory almost twice the vector size centered
15039       // on the original address. If the address is unaligned, we might start
15040       // reading up to (sizeof(vector)-1) bytes below the address of the
15041       // original unaligned load.
15042       MachineFunction &MF = DAG.getMachineFunction();
15043       MachineMemOperand *BaseMMO =
15044         MF.getMachineMemOperand(LD->getMemOperand(),
15045                                 -(long)MemVT.getStoreSize()+1,
15046                                 2*MemVT.getStoreSize()-1);
15047 
15048       // Create the new base load.
15049       SDValue LDXIntID =
15050           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
15051       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15052       SDValue BaseLoad =
15053         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
15054                                 DAG.getVTList(PermTy, MVT::Other),
15055                                 BaseLoadOps, LDTy, BaseMMO);
15056 
15057       // Note that the value of IncOffset (which is provided to the next
15058       // load's pointer info offset value, and thus used to calculate the
15059       // alignment), and the value of IncValue (which is actually used to
15060       // increment the pointer value) are different! This is because we
15061       // require the next load to appear to be aligned, even though it
15062       // is actually offset from the base pointer by a lesser amount.
15063       int IncOffset = VT.getSizeInBits() / 8;
15064       int IncValue = IncOffset;
15065 
15066       // Walk (both up and down) the chain looking for another load at the real
15067       // (aligned) offset (the alignment of the other load does not matter in
15068       // this case). If found, then do not use the offset reduction trick, as
15069       // that will prevent the loads from being later combined (as they would
15070       // otherwise be duplicates).
15071       if (!findConsecutiveLoad(LD, DAG))
15072         --IncValue;
15073 
15074       SDValue Increment =
15075           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
15076       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15077 
15078       MachineMemOperand *ExtraMMO =
15079         MF.getMachineMemOperand(LD->getMemOperand(),
15080                                 1, 2*MemVT.getStoreSize()-1);
15081       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15082       SDValue ExtraLoad =
15083         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
15084                                 DAG.getVTList(PermTy, MVT::Other),
15085                                 ExtraLoadOps, LDTy, ExtraMMO);
15086 
15087       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
15088         BaseLoad.getValue(1), ExtraLoad.getValue(1));
15089 
15090       // Because vperm has a big-endian bias, we must reverse the order
15091       // of the input vectors and complement the permute control vector
15092       // when generating little endian code.  We have already handled the
15093       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15094       // and ExtraLoad here.
15095       SDValue Perm;
15096       if (isLittleEndian)
15097         Perm = BuildIntrinsicOp(IntrPerm,
15098                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15099       else
15100         Perm = BuildIntrinsicOp(IntrPerm,
15101                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15102 
15103       if (VT != PermTy)
15104         Perm = Subtarget.hasAltivec()
15105                    ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15106                    : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15107                                  DAG.getTargetConstant(1, dl, MVT::i64));
15108                                // second argument is 1 because this rounding
15109                                // is always exact.
15110 
15111       // The output of the permutation is our loaded result, the TokenFactor is
15112       // our new chain.
15113       DCI.CombineTo(N, Perm, TF);
15114       return SDValue(N, 0);
15115     }
15116     }
15117     break;
15118     case ISD::INTRINSIC_WO_CHAIN: {
15119       bool isLittleEndian = Subtarget.isLittleEndian();
15120       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15121       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15122                                            : Intrinsic::ppc_altivec_lvsl);
15123       if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15124         SDValue Add = N->getOperand(1);
15125 
15126         int Bits = 4 /* 16 byte alignment */;
15127 
15128         if (DAG.MaskedValueIsZero(Add->getOperand(1),
15129                                   APInt::getAllOnesValue(Bits /* alignment */)
15130                                       .zext(Add.getScalarValueSizeInBits()))) {
15131           SDNode *BasePtr = Add->getOperand(0).getNode();
15132           for (SDNode::use_iterator UI = BasePtr->use_begin(),
15133                                     UE = BasePtr->use_end();
15134                UI != UE; ++UI) {
15135             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15136                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
15137                     IID) {
15138               // We've found another LVSL/LVSR, and this address is an aligned
15139               // multiple of that one. The results will be the same, so use the
15140               // one we've just found instead.
15141 
15142               return SDValue(*UI, 0);
15143             }
15144           }
15145         }
15146 
15147         if (isa<ConstantSDNode>(Add->getOperand(1))) {
15148           SDNode *BasePtr = Add->getOperand(0).getNode();
15149           for (SDNode::use_iterator UI = BasePtr->use_begin(),
15150                UE = BasePtr->use_end(); UI != UE; ++UI) {
15151             if (UI->getOpcode() == ISD::ADD &&
15152                 isa<ConstantSDNode>(UI->getOperand(1)) &&
15153                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15154                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
15155                 (1ULL << Bits) == 0) {
15156               SDNode *OtherAdd = *UI;
15157               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
15158                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
15159                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15160                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
15161                   return SDValue(*VI, 0);
15162                 }
15163               }
15164             }
15165           }
15166         }
15167       }
15168 
15169       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15170       // Expose the vabsduw/h/b opportunity for down stream
15171       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15172           (IID == Intrinsic::ppc_altivec_vmaxsw ||
15173            IID == Intrinsic::ppc_altivec_vmaxsh ||
15174            IID == Intrinsic::ppc_altivec_vmaxsb)) {
15175         SDValue V1 = N->getOperand(1);
15176         SDValue V2 = N->getOperand(2);
15177         if ((V1.getSimpleValueType() == MVT::v4i32 ||
15178              V1.getSimpleValueType() == MVT::v8i16 ||
15179              V1.getSimpleValueType() == MVT::v16i8) &&
15180             V1.getSimpleValueType() == V2.getSimpleValueType()) {
15181           // (0-a, a)
15182           if (V1.getOpcode() == ISD::SUB &&
15183               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
15184               V1.getOperand(1) == V2) {
15185             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15186           }
15187           // (a, 0-a)
15188           if (V2.getOpcode() == ISD::SUB &&
15189               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15190               V2.getOperand(1) == V1) {
15191             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15192           }
15193           // (x-y, y-x)
15194           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15195               V1.getOperand(0) == V2.getOperand(1) &&
15196               V1.getOperand(1) == V2.getOperand(0)) {
15197             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15198           }
15199         }
15200       }
15201     }
15202 
15203     break;
15204   case ISD::INTRINSIC_W_CHAIN:
15205     // For little endian, VSX loads require generating lxvd2x/xxswapd.
15206     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15207     if (Subtarget.needsSwapsForVSXMemOps()) {
15208       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15209       default:
15210         break;
15211       case Intrinsic::ppc_vsx_lxvw4x:
15212       case Intrinsic::ppc_vsx_lxvd2x:
15213         return expandVSXLoadForLE(N, DCI);
15214       }
15215     }
15216     break;
15217   case ISD::INTRINSIC_VOID:
15218     // For little endian, VSX stores require generating xxswapd/stxvd2x.
15219     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15220     if (Subtarget.needsSwapsForVSXMemOps()) {
15221       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15222       default:
15223         break;
15224       case Intrinsic::ppc_vsx_stxvw4x:
15225       case Intrinsic::ppc_vsx_stxvd2x:
15226         return expandVSXStoreForLE(N, DCI);
15227       }
15228     }
15229     break;
15230   case ISD::BSWAP:
15231     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15232     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
15233         N->getOperand(0).hasOneUse() &&
15234         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15235          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
15236           N->getValueType(0) == MVT::i64))) {
15237       SDValue Load = N->getOperand(0);
15238       LoadSDNode *LD = cast<LoadSDNode>(Load);
15239       // Create the byte-swapping load.
15240       SDValue Ops[] = {
15241         LD->getChain(),    // Chain
15242         LD->getBasePtr(),  // Ptr
15243         DAG.getValueType(N->getValueType(0)) // VT
15244       };
15245       SDValue BSLoad =
15246         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
15247                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15248                                               MVT::i64 : MVT::i32, MVT::Other),
15249                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
15250 
15251       // If this is an i16 load, insert the truncate.
15252       SDValue ResVal = BSLoad;
15253       if (N->getValueType(0) == MVT::i16)
15254         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15255 
15256       // First, combine the bswap away.  This makes the value produced by the
15257       // load dead.
15258       DCI.CombineTo(N, ResVal);
15259 
15260       // Next, combine the load away, we give it a bogus result value but a real
15261       // chain result.  The result value is dead because the bswap is dead.
15262       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15263 
15264       // Return N so it doesn't get rechecked!
15265       return SDValue(N, 0);
15266     }
15267     break;
15268   case PPCISD::VCMP:
15269     // If a VCMP_rec node already exists with exactly the same operands as this
15270     // node, use its result instead of this node (VCMP_rec computes both a CR6
15271     // and a normal output).
15272     //
15273     if (!N->getOperand(0).hasOneUse() &&
15274         !N->getOperand(1).hasOneUse() &&
15275         !N->getOperand(2).hasOneUse()) {
15276 
15277       // Scan all of the users of the LHS, looking for VCMP_rec's that match.
15278       SDNode *VCMPrecNode = nullptr;
15279 
15280       SDNode *LHSN = N->getOperand(0).getNode();
15281       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15282            UI != E; ++UI)
15283         if (UI->getOpcode() == PPCISD::VCMP_rec &&
15284             UI->getOperand(1) == N->getOperand(1) &&
15285             UI->getOperand(2) == N->getOperand(2) &&
15286             UI->getOperand(0) == N->getOperand(0)) {
15287           VCMPrecNode = *UI;
15288           break;
15289         }
15290 
15291       // If there is no VCMP_rec node, or if the flag value has a single use,
15292       // don't transform this.
15293       if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
15294         break;
15295 
15296       // Look at the (necessarily single) use of the flag value.  If it has a
15297       // chain, this transformation is more complex.  Note that multiple things
15298       // could use the value result, which we should ignore.
15299       SDNode *FlagUser = nullptr;
15300       for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
15301            FlagUser == nullptr; ++UI) {
15302         assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
15303         SDNode *User = *UI;
15304         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15305           if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
15306             FlagUser = User;
15307             break;
15308           }
15309         }
15310       }
15311 
15312       // If the user is a MFOCRF instruction, we know this is safe.
15313       // Otherwise we give up for right now.
15314       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15315         return SDValue(VCMPrecNode, 0);
15316     }
15317     break;
15318   case ISD::BRCOND: {
15319     SDValue Cond = N->getOperand(1);
15320     SDValue Target = N->getOperand(2);
15321 
15322     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15323         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15324           Intrinsic::loop_decrement) {
15325 
15326       // We now need to make the intrinsic dead (it cannot be instruction
15327       // selected).
15328       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15329       assert(Cond.getNode()->hasOneUse() &&
15330              "Counter decrement has more than one use");
15331 
15332       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15333                          N->getOperand(0), Target);
15334     }
15335   }
15336   break;
15337   case ISD::BR_CC: {
15338     // If this is a branch on an altivec predicate comparison, lower this so
15339     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
15340     // lowering is done pre-legalize, because the legalizer lowers the predicate
15341     // compare down to code that is difficult to reassemble.
15342     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15343     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15344 
15345     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15346     // value. If so, pass-through the AND to get to the intrinsic.
15347     if (LHS.getOpcode() == ISD::AND &&
15348         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15349         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15350           Intrinsic::loop_decrement &&
15351         isa<ConstantSDNode>(LHS.getOperand(1)) &&
15352         !isNullConstant(LHS.getOperand(1)))
15353       LHS = LHS.getOperand(0);
15354 
15355     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15356         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15357           Intrinsic::loop_decrement &&
15358         isa<ConstantSDNode>(RHS)) {
15359       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
15360              "Counter decrement comparison is not EQ or NE");
15361 
15362       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15363       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15364                     (CC == ISD::SETNE && !Val);
15365 
15366       // We now need to make the intrinsic dead (it cannot be instruction
15367       // selected).
15368       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15369       assert(LHS.getNode()->hasOneUse() &&
15370              "Counter decrement has more than one use");
15371 
15372       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15373                          N->getOperand(0), N->getOperand(4));
15374     }
15375 
15376     int CompareOpc;
15377     bool isDot;
15378 
15379     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15380         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15381         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15382       assert(isDot && "Can't compare against a vector result!");
15383 
15384       // If this is a comparison against something other than 0/1, then we know
15385       // that the condition is never/always true.
15386       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15387       if (Val != 0 && Val != 1) {
15388         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
15389           return N->getOperand(0);
15390         // Always !=, turn it into an unconditional branch.
15391         return DAG.getNode(ISD::BR, dl, MVT::Other,
15392                            N->getOperand(0), N->getOperand(4));
15393       }
15394 
15395       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15396 
15397       // Create the PPCISD altivec 'dot' comparison node.
15398       SDValue Ops[] = {
15399         LHS.getOperand(2),  // LHS of compare
15400         LHS.getOperand(3),  // RHS of compare
15401         DAG.getConstant(CompareOpc, dl, MVT::i32)
15402       };
15403       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15404       SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
15405 
15406       // Unpack the result based on how the target uses it.
15407       PPC::Predicate CompOpc;
15408       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15409       default:  // Can't happen, don't crash on invalid number though.
15410       case 0:   // Branch on the value of the EQ bit of CR6.
15411         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15412         break;
15413       case 1:   // Branch on the inverted value of the EQ bit of CR6.
15414         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15415         break;
15416       case 2:   // Branch on the value of the LT bit of CR6.
15417         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15418         break;
15419       case 3:   // Branch on the inverted value of the LT bit of CR6.
15420         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15421         break;
15422       }
15423 
15424       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15425                          DAG.getConstant(CompOpc, dl, MVT::i32),
15426                          DAG.getRegister(PPC::CR6, MVT::i32),
15427                          N->getOperand(4), CompNode.getValue(1));
15428     }
15429     break;
15430   }
15431   case ISD::BUILD_VECTOR:
15432     return DAGCombineBuildVector(N, DCI);
15433   case ISD::ABS:
15434     return combineABS(N, DCI);
15435   case ISD::VSELECT:
15436     return combineVSelect(N, DCI);
15437   }
15438 
15439   return SDValue();
15440 }
15441 
15442 SDValue
15443 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
15444                                  SelectionDAG &DAG,
15445                                  SmallVectorImpl<SDNode *> &Created) const {
15446   // fold (sdiv X, pow2)
15447   EVT VT = N->getValueType(0);
15448   if (VT == MVT::i64 && !Subtarget.isPPC64())
15449     return SDValue();
15450   if ((VT != MVT::i32 && VT != MVT::i64) ||
15451       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
15452     return SDValue();
15453 
15454   SDLoc DL(N);
15455   SDValue N0 = N->getOperand(0);
15456 
15457   bool IsNegPow2 = (-Divisor).isPowerOf2();
15458   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15459   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15460 
15461   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15462   Created.push_back(Op.getNode());
15463 
15464   if (IsNegPow2) {
15465     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15466     Created.push_back(Op.getNode());
15467   }
15468 
15469   return Op;
15470 }
15471 
15472 //===----------------------------------------------------------------------===//
15473 // Inline Assembly Support
15474 //===----------------------------------------------------------------------===//
15475 
15476 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15477                                                       KnownBits &Known,
15478                                                       const APInt &DemandedElts,
15479                                                       const SelectionDAG &DAG,
15480                                                       unsigned Depth) const {
15481   Known.resetAll();
15482   switch (Op.getOpcode()) {
15483   default: break;
15484   case PPCISD::LBRX: {
15485     // lhbrx is known to have the top bits cleared out.
15486     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15487       Known.Zero = 0xFFFF0000;
15488     break;
15489   }
15490   case ISD::INTRINSIC_WO_CHAIN: {
15491     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15492     default: break;
15493     case Intrinsic::ppc_altivec_vcmpbfp_p:
15494     case Intrinsic::ppc_altivec_vcmpeqfp_p:
15495     case Intrinsic::ppc_altivec_vcmpequb_p:
15496     case Intrinsic::ppc_altivec_vcmpequh_p:
15497     case Intrinsic::ppc_altivec_vcmpequw_p:
15498     case Intrinsic::ppc_altivec_vcmpequd_p:
15499     case Intrinsic::ppc_altivec_vcmpequq_p:
15500     case Intrinsic::ppc_altivec_vcmpgefp_p:
15501     case Intrinsic::ppc_altivec_vcmpgtfp_p:
15502     case Intrinsic::ppc_altivec_vcmpgtsb_p:
15503     case Intrinsic::ppc_altivec_vcmpgtsh_p:
15504     case Intrinsic::ppc_altivec_vcmpgtsw_p:
15505     case Intrinsic::ppc_altivec_vcmpgtsd_p:
15506     case Intrinsic::ppc_altivec_vcmpgtsq_p:
15507     case Intrinsic::ppc_altivec_vcmpgtub_p:
15508     case Intrinsic::ppc_altivec_vcmpgtuh_p:
15509     case Intrinsic::ppc_altivec_vcmpgtuw_p:
15510     case Intrinsic::ppc_altivec_vcmpgtud_p:
15511     case Intrinsic::ppc_altivec_vcmpgtuq_p:
15512       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
15513       break;
15514     }
15515   }
15516   }
15517 }
15518 
15519 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
15520   switch (Subtarget.getCPUDirective()) {
15521   default: break;
15522   case PPC::DIR_970:
15523   case PPC::DIR_PWR4:
15524   case PPC::DIR_PWR5:
15525   case PPC::DIR_PWR5X:
15526   case PPC::DIR_PWR6:
15527   case PPC::DIR_PWR6X:
15528   case PPC::DIR_PWR7:
15529   case PPC::DIR_PWR8:
15530   case PPC::DIR_PWR9:
15531   case PPC::DIR_PWR10:
15532   case PPC::DIR_PWR_FUTURE: {
15533     if (!ML)
15534       break;
15535 
15536     if (!DisableInnermostLoopAlign32) {
15537       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15538       // so that we can decrease cache misses and branch-prediction misses.
15539       // Actual alignment of the loop will depend on the hotness check and other
15540       // logic in alignBlocks.
15541       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15542         return Align(32);
15543     }
15544 
15545     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15546 
15547     // For small loops (between 5 and 8 instructions), align to a 32-byte
15548     // boundary so that the entire loop fits in one instruction-cache line.
15549     uint64_t LoopSize = 0;
15550     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15551       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15552         LoopSize += TII->getInstSizeInBytes(*J);
15553         if (LoopSize > 32)
15554           break;
15555       }
15556 
15557     if (LoopSize > 16 && LoopSize <= 32)
15558       return Align(32);
15559 
15560     break;
15561   }
15562   }
15563 
15564   return TargetLowering::getPrefLoopAlignment(ML);
15565 }
15566 
15567 /// getConstraintType - Given a constraint, return the type of
15568 /// constraint it is for this target.
15569 PPCTargetLowering::ConstraintType
15570 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
15571   if (Constraint.size() == 1) {
15572     switch (Constraint[0]) {
15573     default: break;
15574     case 'b':
15575     case 'r':
15576     case 'f':
15577     case 'd':
15578     case 'v':
15579     case 'y':
15580       return C_RegisterClass;
15581     case 'Z':
15582       // FIXME: While Z does indicate a memory constraint, it specifically
15583       // indicates an r+r address (used in conjunction with the 'y' modifier
15584       // in the replacement string). Currently, we're forcing the base
15585       // register to be r0 in the asm printer (which is interpreted as zero)
15586       // and forming the complete address in the second register. This is
15587       // suboptimal.
15588       return C_Memory;
15589     }
15590   } else if (Constraint == "wc") { // individual CR bits.
15591     return C_RegisterClass;
15592   } else if (Constraint == "wa" || Constraint == "wd" ||
15593              Constraint == "wf" || Constraint == "ws" ||
15594              Constraint == "wi" || Constraint == "ww") {
15595     return C_RegisterClass; // VSX registers.
15596   }
15597   return TargetLowering::getConstraintType(Constraint);
15598 }
15599 
15600 /// Examine constraint type and operand type and determine a weight value.
15601 /// This object must already have been set up with the operand type
15602 /// and the current alternative constraint selected.
15603 TargetLowering::ConstraintWeight
15604 PPCTargetLowering::getSingleConstraintMatchWeight(
15605     AsmOperandInfo &info, const char *constraint) const {
15606   ConstraintWeight weight = CW_Invalid;
15607   Value *CallOperandVal = info.CallOperandVal;
15608     // If we don't have a value, we can't do a match,
15609     // but allow it at the lowest weight.
15610   if (!CallOperandVal)
15611     return CW_Default;
15612   Type *type = CallOperandVal->getType();
15613 
15614   // Look at the constraint type.
15615   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15616     return CW_Register; // an individual CR bit.
15617   else if ((StringRef(constraint) == "wa" ||
15618             StringRef(constraint) == "wd" ||
15619             StringRef(constraint) == "wf") &&
15620            type->isVectorTy())
15621     return CW_Register;
15622   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15623     return CW_Register; // just hold 64-bit integers data.
15624   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15625     return CW_Register;
15626   else if (StringRef(constraint) == "ww" && type->isFloatTy())
15627     return CW_Register;
15628 
15629   switch (*constraint) {
15630   default:
15631     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15632     break;
15633   case 'b':
15634     if (type->isIntegerTy())
15635       weight = CW_Register;
15636     break;
15637   case 'f':
15638     if (type->isFloatTy())
15639       weight = CW_Register;
15640     break;
15641   case 'd':
15642     if (type->isDoubleTy())
15643       weight = CW_Register;
15644     break;
15645   case 'v':
15646     if (type->isVectorTy())
15647       weight = CW_Register;
15648     break;
15649   case 'y':
15650     weight = CW_Register;
15651     break;
15652   case 'Z':
15653     weight = CW_Memory;
15654     break;
15655   }
15656   return weight;
15657 }
15658 
15659 std::pair<unsigned, const TargetRegisterClass *>
15660 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
15661                                                 StringRef Constraint,
15662                                                 MVT VT) const {
15663   if (Constraint.size() == 1) {
15664     // GCC RS6000 Constraint Letters
15665     switch (Constraint[0]) {
15666     case 'b':   // R1-R31
15667       if (VT == MVT::i64 && Subtarget.isPPC64())
15668         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15669       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15670     case 'r':   // R0-R31
15671       if (VT == MVT::i64 && Subtarget.isPPC64())
15672         return std::make_pair(0U, &PPC::G8RCRegClass);
15673       return std::make_pair(0U, &PPC::GPRCRegClass);
15674     // 'd' and 'f' constraints are both defined to be "the floating point
15675     // registers", where one is for 32-bit and the other for 64-bit. We don't
15676     // really care overly much here so just give them all the same reg classes.
15677     case 'd':
15678     case 'f':
15679       if (Subtarget.hasSPE()) {
15680         if (VT == MVT::f32 || VT == MVT::i32)
15681           return std::make_pair(0U, &PPC::GPRCRegClass);
15682         if (VT == MVT::f64 || VT == MVT::i64)
15683           return std::make_pair(0U, &PPC::SPERCRegClass);
15684       } else {
15685         if (VT == MVT::f32 || VT == MVT::i32)
15686           return std::make_pair(0U, &PPC::F4RCRegClass);
15687         if (VT == MVT::f64 || VT == MVT::i64)
15688           return std::make_pair(0U, &PPC::F8RCRegClass);
15689       }
15690       break;
15691     case 'v':
15692       if (Subtarget.hasAltivec())
15693         return std::make_pair(0U, &PPC::VRRCRegClass);
15694       break;
15695     case 'y':   // crrc
15696       return std::make_pair(0U, &PPC::CRRCRegClass);
15697     }
15698   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15699     // An individual CR bit.
15700     return std::make_pair(0U, &PPC::CRBITRCRegClass);
15701   } else if ((Constraint == "wa" || Constraint == "wd" ||
15702              Constraint == "wf" || Constraint == "wi") &&
15703              Subtarget.hasVSX()) {
15704     return std::make_pair(0U, &PPC::VSRCRegClass);
15705   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15706     if (VT == MVT::f32 && Subtarget.hasP8Vector())
15707       return std::make_pair(0U, &PPC::VSSRCRegClass);
15708     else
15709       return std::make_pair(0U, &PPC::VSFRCRegClass);
15710   }
15711 
15712   // If we name a VSX register, we can't defer to the base class because it
15713   // will not recognize the correct register (their names will be VSL{0-31}
15714   // and V{0-31} so they won't match). So we match them here.
15715   if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15716     int VSNum = atoi(Constraint.data() + 3);
15717     assert(VSNum >= 0 && VSNum <= 63 &&
15718            "Attempted to access a vsr out of range");
15719     if (VSNum < 32)
15720       return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15721     return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15722   }
15723   std::pair<unsigned, const TargetRegisterClass *> R =
15724       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
15725 
15726   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15727   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15728   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15729   // register.
15730   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15731   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15732   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15733       PPC::GPRCRegClass.contains(R.first))
15734     return std::make_pair(TRI->getMatchingSuperReg(R.first,
15735                             PPC::sub_32, &PPC::G8RCRegClass),
15736                           &PPC::G8RCRegClass);
15737 
15738   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15739   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15740     R.first = PPC::CR0;
15741     R.second = &PPC::CRRCRegClass;
15742   }
15743 
15744   return R;
15745 }
15746 
15747 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15748 /// vector.  If it is invalid, don't add anything to Ops.
15749 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15750                                                      std::string &Constraint,
15751                                                      std::vector<SDValue>&Ops,
15752                                                      SelectionDAG &DAG) const {
15753   SDValue Result;
15754 
15755   // Only support length 1 constraints.
15756   if (Constraint.length() > 1) return;
15757 
15758   char Letter = Constraint[0];
15759   switch (Letter) {
15760   default: break;
15761   case 'I':
15762   case 'J':
15763   case 'K':
15764   case 'L':
15765   case 'M':
15766   case 'N':
15767   case 'O':
15768   case 'P': {
15769     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15770     if (!CST) return; // Must be an immediate to match.
15771     SDLoc dl(Op);
15772     int64_t Value = CST->getSExtValue();
15773     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15774                          // numbers are printed as such.
15775     switch (Letter) {
15776     default: llvm_unreachable("Unknown constraint letter!");
15777     case 'I':  // "I" is a signed 16-bit constant.
15778       if (isInt<16>(Value))
15779         Result = DAG.getTargetConstant(Value, dl, TCVT);
15780       break;
15781     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
15782       if (isShiftedUInt<16, 16>(Value))
15783         Result = DAG.getTargetConstant(Value, dl, TCVT);
15784       break;
15785     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
15786       if (isShiftedInt<16, 16>(Value))
15787         Result = DAG.getTargetConstant(Value, dl, TCVT);
15788       break;
15789     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
15790       if (isUInt<16>(Value))
15791         Result = DAG.getTargetConstant(Value, dl, TCVT);
15792       break;
15793     case 'M':  // "M" is a constant that is greater than 31.
15794       if (Value > 31)
15795         Result = DAG.getTargetConstant(Value, dl, TCVT);
15796       break;
15797     case 'N':  // "N" is a positive constant that is an exact power of two.
15798       if (Value > 0 && isPowerOf2_64(Value))
15799         Result = DAG.getTargetConstant(Value, dl, TCVT);
15800       break;
15801     case 'O':  // "O" is the constant zero.
15802       if (Value == 0)
15803         Result = DAG.getTargetConstant(Value, dl, TCVT);
15804       break;
15805     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
15806       if (isInt<16>(-Value))
15807         Result = DAG.getTargetConstant(Value, dl, TCVT);
15808       break;
15809     }
15810     break;
15811   }
15812   }
15813 
15814   if (Result.getNode()) {
15815     Ops.push_back(Result);
15816     return;
15817   }
15818 
15819   // Handle standard constraint letters.
15820   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15821 }
15822 
15823 // isLegalAddressingMode - Return true if the addressing mode represented
15824 // by AM is legal for this target, for a load/store of the specified type.
15825 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
15826                                               const AddrMode &AM, Type *Ty,
15827                                               unsigned AS,
15828                                               Instruction *I) const {
15829   // Vector type r+i form is supported since power9 as DQ form. We don't check
15830   // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15831   // imm form is preferred and the offset can be adjusted to use imm form later
15832   // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15833   // max offset to check legal addressing mode, we should be a little aggressive
15834   // to contain other offsets for that LSRUse.
15835   if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15836     return false;
15837 
15838   // PPC allows a sign-extended 16-bit immediate field.
15839   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15840     return false;
15841 
15842   // No global is ever allowed as a base.
15843   if (AM.BaseGV)
15844     return false;
15845 
15846   // PPC only support r+r,
15847   switch (AM.Scale) {
15848   case 0:  // "r+i" or just "i", depending on HasBaseReg.
15849     break;
15850   case 1:
15851     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
15852       return false;
15853     // Otherwise we have r+r or r+i.
15854     break;
15855   case 2:
15856     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
15857       return false;
15858     // Allow 2*r as r+r.
15859     break;
15860   default:
15861     // No other scales are supported.
15862     return false;
15863   }
15864 
15865   return true;
15866 }
15867 
15868 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15869                                            SelectionDAG &DAG) const {
15870   MachineFunction &MF = DAG.getMachineFunction();
15871   MachineFrameInfo &MFI = MF.getFrameInfo();
15872   MFI.setReturnAddressIsTaken(true);
15873 
15874   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
15875     return SDValue();
15876 
15877   SDLoc dl(Op);
15878   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15879 
15880   // Make sure the function does not optimize away the store of the RA to
15881   // the stack.
15882   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15883   FuncInfo->setLRStoreRequired();
15884   bool isPPC64 = Subtarget.isPPC64();
15885   auto PtrVT = getPointerTy(MF.getDataLayout());
15886 
15887   if (Depth > 0) {
15888     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15889     SDValue Offset =
15890         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15891                         isPPC64 ? MVT::i64 : MVT::i32);
15892     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15893                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15894                        MachinePointerInfo());
15895   }
15896 
15897   // Just load the return address off the stack.
15898   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15899   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15900                      MachinePointerInfo());
15901 }
15902 
15903 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15904                                           SelectionDAG &DAG) const {
15905   SDLoc dl(Op);
15906   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15907 
15908   MachineFunction &MF = DAG.getMachineFunction();
15909   MachineFrameInfo &MFI = MF.getFrameInfo();
15910   MFI.setFrameAddressIsTaken(true);
15911 
15912   EVT PtrVT = getPointerTy(MF.getDataLayout());
15913   bool isPPC64 = PtrVT == MVT::i64;
15914 
15915   // Naked functions never have a frame pointer, and so we use r1. For all
15916   // other functions, this decision must be delayed until during PEI.
15917   unsigned FrameReg;
15918   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15919     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15920   else
15921     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15922 
15923   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15924                                          PtrVT);
15925   while (Depth--)
15926     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15927                             FrameAddr, MachinePointerInfo());
15928   return FrameAddr;
15929 }
15930 
15931 // FIXME? Maybe this could be a TableGen attribute on some registers and
15932 // this table could be generated automatically from RegInfo.
15933 Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
15934                                               const MachineFunction &MF) const {
15935   bool isPPC64 = Subtarget.isPPC64();
15936 
15937   bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15938   if (!is64Bit && VT != LLT::scalar(32))
15939     report_fatal_error("Invalid register global variable type");
15940 
15941   Register Reg = StringSwitch<Register>(RegName)
15942                      .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15943                      .Case("r2", isPPC64 ? Register() : PPC::R2)
15944                      .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15945                      .Default(Register());
15946 
15947   if (Reg)
15948     return Reg;
15949   report_fatal_error("Invalid register name global variable");
15950 }
15951 
15952 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
15953   // 32-bit SVR4 ABI access everything as got-indirect.
15954   if (Subtarget.is32BitELFABI())
15955     return true;
15956 
15957   // AIX accesses everything indirectly through the TOC, which is similar to
15958   // the GOT.
15959   if (Subtarget.isAIXABI())
15960     return true;
15961 
15962   CodeModel::Model CModel = getTargetMachine().getCodeModel();
15963   // If it is small or large code model, module locals are accessed
15964   // indirectly by loading their address from .toc/.got.
15965   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15966     return true;
15967 
15968   // JumpTable and BlockAddress are accessed as got-indirect.
15969   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15970     return true;
15971 
15972   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15973     return Subtarget.isGVIndirectSymbol(G->getGlobal());
15974 
15975   return false;
15976 }
15977 
15978 bool
15979 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
15980   // The PowerPC target isn't yet aware of offsets.
15981   return false;
15982 }
15983 
15984 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
15985                                            const CallInst &I,
15986                                            MachineFunction &MF,
15987                                            unsigned Intrinsic) const {
15988   switch (Intrinsic) {
15989   case Intrinsic::ppc_altivec_lvx:
15990   case Intrinsic::ppc_altivec_lvxl:
15991   case Intrinsic::ppc_altivec_lvebx:
15992   case Intrinsic::ppc_altivec_lvehx:
15993   case Intrinsic::ppc_altivec_lvewx:
15994   case Intrinsic::ppc_vsx_lxvd2x:
15995   case Intrinsic::ppc_vsx_lxvw4x:
15996   case Intrinsic::ppc_vsx_lxvd2x_be:
15997   case Intrinsic::ppc_vsx_lxvw4x_be:
15998   case Intrinsic::ppc_vsx_lxvl:
15999   case Intrinsic::ppc_vsx_lxvll: {
16000     EVT VT;
16001     switch (Intrinsic) {
16002     case Intrinsic::ppc_altivec_lvebx:
16003       VT = MVT::i8;
16004       break;
16005     case Intrinsic::ppc_altivec_lvehx:
16006       VT = MVT::i16;
16007       break;
16008     case Intrinsic::ppc_altivec_lvewx:
16009       VT = MVT::i32;
16010       break;
16011     case Intrinsic::ppc_vsx_lxvd2x:
16012     case Intrinsic::ppc_vsx_lxvd2x_be:
16013       VT = MVT::v2f64;
16014       break;
16015     default:
16016       VT = MVT::v4i32;
16017       break;
16018     }
16019 
16020     Info.opc = ISD::INTRINSIC_W_CHAIN;
16021     Info.memVT = VT;
16022     Info.ptrVal = I.getArgOperand(0);
16023     Info.offset = -VT.getStoreSize()+1;
16024     Info.size = 2*VT.getStoreSize()-1;
16025     Info.align = Align(1);
16026     Info.flags = MachineMemOperand::MOLoad;
16027     return true;
16028   }
16029   case Intrinsic::ppc_altivec_stvx:
16030   case Intrinsic::ppc_altivec_stvxl:
16031   case Intrinsic::ppc_altivec_stvebx:
16032   case Intrinsic::ppc_altivec_stvehx:
16033   case Intrinsic::ppc_altivec_stvewx:
16034   case Intrinsic::ppc_vsx_stxvd2x:
16035   case Intrinsic::ppc_vsx_stxvw4x:
16036   case Intrinsic::ppc_vsx_stxvd2x_be:
16037   case Intrinsic::ppc_vsx_stxvw4x_be:
16038   case Intrinsic::ppc_vsx_stxvl:
16039   case Intrinsic::ppc_vsx_stxvll: {
16040     EVT VT;
16041     switch (Intrinsic) {
16042     case Intrinsic::ppc_altivec_stvebx:
16043       VT = MVT::i8;
16044       break;
16045     case Intrinsic::ppc_altivec_stvehx:
16046       VT = MVT::i16;
16047       break;
16048     case Intrinsic::ppc_altivec_stvewx:
16049       VT = MVT::i32;
16050       break;
16051     case Intrinsic::ppc_vsx_stxvd2x:
16052     case Intrinsic::ppc_vsx_stxvd2x_be:
16053       VT = MVT::v2f64;
16054       break;
16055     default:
16056       VT = MVT::v4i32;
16057       break;
16058     }
16059 
16060     Info.opc = ISD::INTRINSIC_VOID;
16061     Info.memVT = VT;
16062     Info.ptrVal = I.getArgOperand(1);
16063     Info.offset = -VT.getStoreSize()+1;
16064     Info.size = 2*VT.getStoreSize()-1;
16065     Info.align = Align(1);
16066     Info.flags = MachineMemOperand::MOStore;
16067     return true;
16068   }
16069   default:
16070     break;
16071   }
16072 
16073   return false;
16074 }
16075 
16076 /// It returns EVT::Other if the type should be determined using generic
16077 /// target-independent logic.
16078 EVT PPCTargetLowering::getOptimalMemOpType(
16079     const MemOp &Op, const AttributeList &FuncAttributes) const {
16080   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
16081     // We should use Altivec/VSX loads and stores when available. For unaligned
16082     // addresses, unaligned VSX loads are only fast starting with the P8.
16083     if (Subtarget.hasAltivec() && Op.size() >= 16 &&
16084         (Op.isAligned(Align(16)) ||
16085          ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
16086       return MVT::v4i32;
16087   }
16088 
16089   if (Subtarget.isPPC64()) {
16090     return MVT::i64;
16091   }
16092 
16093   return MVT::i32;
16094 }
16095 
16096 /// Returns true if it is beneficial to convert a load of a constant
16097 /// to just the constant itself.
16098 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
16099                                                           Type *Ty) const {
16100   assert(Ty->isIntegerTy());
16101 
16102   unsigned BitSize = Ty->getPrimitiveSizeInBits();
16103   return !(BitSize == 0 || BitSize > 64);
16104 }
16105 
16106 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
16107   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16108     return false;
16109   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16110   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16111   return NumBits1 == 64 && NumBits2 == 32;
16112 }
16113 
16114 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
16115   if (!VT1.isInteger() || !VT2.isInteger())
16116     return false;
16117   unsigned NumBits1 = VT1.getSizeInBits();
16118   unsigned NumBits2 = VT2.getSizeInBits();
16119   return NumBits1 == 64 && NumBits2 == 32;
16120 }
16121 
16122 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
16123   // Generally speaking, zexts are not free, but they are free when they can be
16124   // folded with other operations.
16125   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16126     EVT MemVT = LD->getMemoryVT();
16127     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16128          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16129         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16130          LD->getExtensionType() == ISD::ZEXTLOAD))
16131       return true;
16132   }
16133 
16134   // FIXME: Add other cases...
16135   //  - 32-bit shifts with a zext to i64
16136   //  - zext after ctlz, bswap, etc.
16137   //  - zext after and by a constant mask
16138 
16139   return TargetLowering::isZExtFree(Val, VT2);
16140 }
16141 
16142 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16143   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
16144          "invalid fpext types");
16145   // Extending to float128 is not free.
16146   if (DestVT == MVT::f128)
16147     return false;
16148   return true;
16149 }
16150 
16151 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
16152   return isInt<16>(Imm) || isUInt<16>(Imm);
16153 }
16154 
16155 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
16156   return isInt<16>(Imm) || isUInt<16>(Imm);
16157 }
16158 
16159 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
16160                                                        unsigned,
16161                                                        unsigned,
16162                                                        MachineMemOperand::Flags,
16163                                                        bool *Fast) const {
16164   if (DisablePPCUnaligned)
16165     return false;
16166 
16167   // PowerPC supports unaligned memory access for simple non-vector types.
16168   // Although accessing unaligned addresses is not as efficient as accessing
16169   // aligned addresses, it is generally more efficient than manual expansion,
16170   // and generally only traps for software emulation when crossing page
16171   // boundaries.
16172 
16173   if (!VT.isSimple())
16174     return false;
16175 
16176   if (VT.isFloatingPoint() && !VT.isVector() &&
16177       !Subtarget.allowsUnalignedFPAccess())
16178     return false;
16179 
16180   if (VT.getSimpleVT().isVector()) {
16181     if (Subtarget.hasVSX()) {
16182       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16183           VT != MVT::v4f32 && VT != MVT::v4i32)
16184         return false;
16185     } else {
16186       return false;
16187     }
16188   }
16189 
16190   if (VT == MVT::ppcf128)
16191     return false;
16192 
16193   if (Fast)
16194     *Fast = true;
16195 
16196   return true;
16197 }
16198 
16199 bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
16200                                                SDValue C) const {
16201   // Check integral scalar types.
16202   if (!VT.isScalarInteger())
16203     return false;
16204   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16205     if (!ConstNode->getAPIntValue().isSignedIntN(64))
16206       return false;
16207     // This transformation will generate >= 2 operations. But the following
16208     // cases will generate <= 2 instructions during ISEL. So exclude them.
16209     // 1. If the constant multiplier fits 16 bits, it can be handled by one
16210     // HW instruction, ie. MULLI
16211     // 2. If the multiplier after shifted fits 16 bits, an extra shift
16212     // instruction is needed than case 1, ie. MULLI and RLDICR
16213     int64_t Imm = ConstNode->getSExtValue();
16214     unsigned Shift = countTrailingZeros<uint64_t>(Imm);
16215     Imm >>= Shift;
16216     if (isInt<16>(Imm))
16217       return false;
16218     uint64_t UImm = static_cast<uint64_t>(Imm);
16219     if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16220         isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16221       return true;
16222   }
16223   return false;
16224 }
16225 
16226 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
16227                                                    EVT VT) const {
16228   return isFMAFasterThanFMulAndFAdd(
16229       MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
16230 }
16231 
16232 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
16233                                                    Type *Ty) const {
16234   switch (Ty->getScalarType()->getTypeID()) {
16235   case Type::FloatTyID:
16236   case Type::DoubleTyID:
16237     return true;
16238   case Type::FP128TyID:
16239     return Subtarget.hasP9Vector();
16240   default:
16241     return false;
16242   }
16243 }
16244 
16245 // FIXME: add more patterns which are not profitable to hoist.
16246 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
16247   if (!I->hasOneUse())
16248     return true;
16249 
16250   Instruction *User = I->user_back();
16251   assert(User && "A single use instruction with no uses.");
16252 
16253   switch (I->getOpcode()) {
16254   case Instruction::FMul: {
16255     // Don't break FMA, PowerPC prefers FMA.
16256     if (User->getOpcode() != Instruction::FSub &&
16257         User->getOpcode() != Instruction::FAdd)
16258       return true;
16259 
16260     const TargetOptions &Options = getTargetMachine().Options;
16261     const Function *F = I->getFunction();
16262     const DataLayout &DL = F->getParent()->getDataLayout();
16263     Type *Ty = User->getOperand(0)->getType();
16264 
16265     return !(
16266         isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16267         isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
16268         (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16269   }
16270   case Instruction::Load: {
16271     // Don't break "store (load float*)" pattern, this pattern will be combined
16272     // to "store (load int32)" in later InstCombine pass. See function
16273     // combineLoadToOperationType. On PowerPC, loading a float point takes more
16274     // cycles than loading a 32 bit integer.
16275     LoadInst *LI = cast<LoadInst>(I);
16276     // For the loads that combineLoadToOperationType does nothing, like
16277     // ordered load, it should be profitable to hoist them.
16278     // For swifterror load, it can only be used for pointer to pointer type, so
16279     // later type check should get rid of this case.
16280     if (!LI->isUnordered())
16281       return true;
16282 
16283     if (User->getOpcode() != Instruction::Store)
16284       return true;
16285 
16286     if (I->getType()->getTypeID() != Type::FloatTyID)
16287       return true;
16288 
16289     return false;
16290   }
16291   default:
16292     return true;
16293   }
16294   return true;
16295 }
16296 
16297 const MCPhysReg *
16298 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
16299   // LR is a callee-save register, but we must treat it as clobbered by any call
16300   // site. Hence we include LR in the scratch registers, which are in turn added
16301   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16302   // to CTR, which is used by any indirect call.
16303   static const MCPhysReg ScratchRegs[] = {
16304     PPC::X12, PPC::LR8, PPC::CTR8, 0
16305   };
16306 
16307   return ScratchRegs;
16308 }
16309 
16310 Register PPCTargetLowering::getExceptionPointerRegister(
16311     const Constant *PersonalityFn) const {
16312   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16313 }
16314 
16315 Register PPCTargetLowering::getExceptionSelectorRegister(
16316     const Constant *PersonalityFn) const {
16317   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16318 }
16319 
16320 bool
16321 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
16322                      EVT VT , unsigned DefinedValues) const {
16323   if (VT == MVT::v2i64)
16324     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16325 
16326   if (Subtarget.hasVSX())
16327     return true;
16328 
16329   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
16330 }
16331 
16332 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
16333   if (DisableILPPref || Subtarget.enableMachineScheduler())
16334     return TargetLowering::getSchedulingPreference(N);
16335 
16336   return Sched::ILP;
16337 }
16338 
16339 // Create a fast isel object.
16340 FastISel *
16341 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
16342                                   const TargetLibraryInfo *LibInfo) const {
16343   return PPC::createFastISel(FuncInfo, LibInfo);
16344 }
16345 
16346 // 'Inverted' means the FMA opcode after negating one multiplicand.
16347 // For example, (fma -a b c) = (fnmsub a b c)
16348 static unsigned invertFMAOpcode(unsigned Opc) {
16349   switch (Opc) {
16350   default:
16351     llvm_unreachable("Invalid FMA opcode for PowerPC!");
16352   case ISD::FMA:
16353     return PPCISD::FNMSUB;
16354   case PPCISD::FNMSUB:
16355     return ISD::FMA;
16356   }
16357 }
16358 
16359 SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
16360                                                 bool LegalOps, bool OptForSize,
16361                                                 NegatibleCost &Cost,
16362                                                 unsigned Depth) const {
16363   if (Depth > SelectionDAG::MaxRecursionDepth)
16364     return SDValue();
16365 
16366   unsigned Opc = Op.getOpcode();
16367   EVT VT = Op.getValueType();
16368   SDNodeFlags Flags = Op.getNode()->getFlags();
16369 
16370   switch (Opc) {
16371   case PPCISD::FNMSUB:
16372     if (!Op.hasOneUse() || !isTypeLegal(VT))
16373       break;
16374 
16375     const TargetOptions &Options = getTargetMachine().Options;
16376     SDValue N0 = Op.getOperand(0);
16377     SDValue N1 = Op.getOperand(1);
16378     SDValue N2 = Op.getOperand(2);
16379     SDLoc Loc(Op);
16380 
16381     NegatibleCost N2Cost = NegatibleCost::Expensive;
16382     SDValue NegN2 =
16383         getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16384 
16385     if (!NegN2)
16386       return SDValue();
16387 
16388     // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16389     // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16390     // These transformations may change sign of zeroes. For example,
16391     // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16392     if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16393       // Try and choose the cheaper one to negate.
16394       NegatibleCost N0Cost = NegatibleCost::Expensive;
16395       SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16396                                            N0Cost, Depth + 1);
16397 
16398       NegatibleCost N1Cost = NegatibleCost::Expensive;
16399       SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16400                                            N1Cost, Depth + 1);
16401 
16402       if (NegN0 && N0Cost <= N1Cost) {
16403         Cost = std::min(N0Cost, N2Cost);
16404         return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16405       } else if (NegN1) {
16406         Cost = std::min(N1Cost, N2Cost);
16407         return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16408       }
16409     }
16410 
16411     // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16412     if (isOperationLegal(ISD::FMA, VT)) {
16413       Cost = N2Cost;
16414       return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16415     }
16416 
16417     break;
16418   }
16419 
16420   return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16421                                               Cost, Depth);
16422 }
16423 
16424 // Override to enable LOAD_STACK_GUARD lowering on Linux.
16425 bool PPCTargetLowering::useLoadStackGuardNode() const {
16426   if (!Subtarget.isTargetLinux())
16427     return TargetLowering::useLoadStackGuardNode();
16428   return true;
16429 }
16430 
16431 // Override to disable global variable loading on Linux.
16432 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
16433   if (!Subtarget.isTargetLinux())
16434     return TargetLowering::insertSSPDeclarations(M);
16435 }
16436 
16437 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
16438                                      bool ForCodeSize) const {
16439   if (!VT.isSimple() || !Subtarget.hasVSX())
16440     return false;
16441 
16442   switch(VT.getSimpleVT().SimpleTy) {
16443   default:
16444     // For FP types that are currently not supported by PPC backend, return
16445     // false. Examples: f16, f80.
16446     return false;
16447   case MVT::f32:
16448   case MVT::f64:
16449     if (Subtarget.hasPrefixInstrs()) {
16450       // With prefixed instructions, we can materialize anything that can be
16451       // represented with a 32-bit immediate, not just positive zero.
16452       APFloat APFloatOfImm = Imm;
16453       return convertToNonDenormSingle(APFloatOfImm);
16454     }
16455     LLVM_FALLTHROUGH;
16456   case MVT::ppcf128:
16457     return Imm.isPosZero();
16458   }
16459 }
16460 
16461 // For vector shift operation op, fold
16462 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16463 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
16464                                   SelectionDAG &DAG) {
16465   SDValue N0 = N->getOperand(0);
16466   SDValue N1 = N->getOperand(1);
16467   EVT VT = N0.getValueType();
16468   unsigned OpSizeInBits = VT.getScalarSizeInBits();
16469   unsigned Opcode = N->getOpcode();
16470   unsigned TargetOpcode;
16471 
16472   switch (Opcode) {
16473   default:
16474     llvm_unreachable("Unexpected shift operation");
16475   case ISD::SHL:
16476     TargetOpcode = PPCISD::SHL;
16477     break;
16478   case ISD::SRL:
16479     TargetOpcode = PPCISD::SRL;
16480     break;
16481   case ISD::SRA:
16482     TargetOpcode = PPCISD::SRA;
16483     break;
16484   }
16485 
16486   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16487       N1->getOpcode() == ISD::AND)
16488     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
16489       if (Mask->getZExtValue() == OpSizeInBits - 1)
16490         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16491 
16492   return SDValue();
16493 }
16494 
16495 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
16496   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16497     return Value;
16498 
16499   SDValue N0 = N->getOperand(0);
16500   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16501   if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
16502       N0.getOpcode() != ISD::SIGN_EXTEND ||
16503       N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
16504       N->getValueType(0) != MVT::i64)
16505     return SDValue();
16506 
16507   // We can't save an operation here if the value is already extended, and
16508   // the existing shift is easier to combine.
16509   SDValue ExtsSrc = N0.getOperand(0);
16510   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
16511       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
16512     return SDValue();
16513 
16514   SDLoc DL(N0);
16515   SDValue ShiftBy = SDValue(CN1, 0);
16516   // We want the shift amount to be i32 on the extswli, but the shift could
16517   // have an i64.
16518   if (ShiftBy.getValueType() == MVT::i64)
16519     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
16520 
16521   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
16522                          ShiftBy);
16523 }
16524 
16525 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
16526   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16527     return Value;
16528 
16529   return SDValue();
16530 }
16531 
16532 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
16533   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16534     return Value;
16535 
16536   return SDValue();
16537 }
16538 
16539 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16540 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16541 // When C is zero, the equation (addi Z, -C) can be simplified to Z
16542 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16543 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
16544                                  const PPCSubtarget &Subtarget) {
16545   if (!Subtarget.isPPC64())
16546     return SDValue();
16547 
16548   SDValue LHS = N->getOperand(0);
16549   SDValue RHS = N->getOperand(1);
16550 
16551   auto isZextOfCompareWithConstant = [](SDValue Op) {
16552     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16553         Op.getValueType() != MVT::i64)
16554       return false;
16555 
16556     SDValue Cmp = Op.getOperand(0);
16557     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16558         Cmp.getOperand(0).getValueType() != MVT::i64)
16559       return false;
16560 
16561     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16562       int64_t NegConstant = 0 - Constant->getSExtValue();
16563       // Due to the limitations of the addi instruction,
16564       // -C is required to be [-32768, 32767].
16565       return isInt<16>(NegConstant);
16566     }
16567 
16568     return false;
16569   };
16570 
16571   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16572   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16573 
16574   // If there is a pattern, canonicalize a zext operand to the RHS.
16575   if (LHSHasPattern && !RHSHasPattern)
16576     std::swap(LHS, RHS);
16577   else if (!LHSHasPattern && !RHSHasPattern)
16578     return SDValue();
16579 
16580   SDLoc DL(N);
16581   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16582   SDValue Cmp = RHS.getOperand(0);
16583   SDValue Z = Cmp.getOperand(0);
16584   auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16585 
16586   assert(Constant && "Constant Should not be a null pointer.");
16587   int64_t NegConstant = 0 - Constant->getSExtValue();
16588 
16589   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16590   default: break;
16591   case ISD::SETNE: {
16592     //                                 when C == 0
16593     //                             --> addze X, (addic Z, -1).carry
16594     //                            /
16595     // add X, (zext(setne Z, C))--
16596     //                            \    when -32768 <= -C <= 32767 && C != 0
16597     //                             --> addze X, (addic (addi Z, -C), -1).carry
16598     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16599                               DAG.getConstant(NegConstant, DL, MVT::i64));
16600     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16601     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16602                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16603     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16604                        SDValue(Addc.getNode(), 1));
16605     }
16606   case ISD::SETEQ: {
16607     //                                 when C == 0
16608     //                             --> addze X, (subfic Z, 0).carry
16609     //                            /
16610     // add X, (zext(sete  Z, C))--
16611     //                            \    when -32768 <= -C <= 32767 && C != 0
16612     //                             --> addze X, (subfic (addi Z, -C), 0).carry
16613     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16614                               DAG.getConstant(NegConstant, DL, MVT::i64));
16615     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16616     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16617                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16618     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16619                        SDValue(Subc.getNode(), 1));
16620     }
16621   }
16622 
16623   return SDValue();
16624 }
16625 
16626 // Transform
16627 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16628 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16629 // In this case both C1 and C2 must be known constants.
16630 // C1+C2 must fit into a 34 bit signed integer.
16631 static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
16632                                           const PPCSubtarget &Subtarget) {
16633   if (!Subtarget.isUsingPCRelativeCalls())
16634     return SDValue();
16635 
16636   // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16637   // If we find that node try to cast the Global Address and the Constant.
16638   SDValue LHS = N->getOperand(0);
16639   SDValue RHS = N->getOperand(1);
16640 
16641   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16642     std::swap(LHS, RHS);
16643 
16644   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16645     return SDValue();
16646 
16647   // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16648   GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16649   ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16650 
16651   // Check that both casts succeeded.
16652   if (!GSDN || !ConstNode)
16653     return SDValue();
16654 
16655   int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16656   SDLoc DL(GSDN);
16657 
16658   // The signed int offset needs to fit in 34 bits.
16659   if (!isInt<34>(NewOffset))
16660     return SDValue();
16661 
16662   // The new global address is a copy of the old global address except
16663   // that it has the updated Offset.
16664   SDValue GA =
16665       DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16666                                  NewOffset, GSDN->getTargetFlags());
16667   SDValue MatPCRel =
16668       DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16669   return MatPCRel;
16670 }
16671 
16672 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16673   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16674     return Value;
16675 
16676   if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16677     return Value;
16678 
16679   return SDValue();
16680 }
16681 
16682 // Detect TRUNCATE operations on bitcasts of float128 values.
16683 // What we are looking for here is the situtation where we extract a subset
16684 // of bits from a 128 bit float.
16685 // This can be of two forms:
16686 // 1) BITCAST of f128 feeding TRUNCATE
16687 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16688 // The reason this is required is because we do not have a legal i128 type
16689 // and so we want to prevent having to store the f128 and then reload part
16690 // of it.
16691 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16692                                            DAGCombinerInfo &DCI) const {
16693   // If we are using CRBits then try that first.
16694   if (Subtarget.useCRBits()) {
16695     // Check if CRBits did anything and return that if it did.
16696     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16697       return CRTruncValue;
16698   }
16699 
16700   SDLoc dl(N);
16701   SDValue Op0 = N->getOperand(0);
16702 
16703   // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16704   if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16705     EVT VT = N->getValueType(0);
16706     if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16707       return SDValue();
16708     SDValue Sub = Op0.getOperand(0);
16709     if (Sub.getOpcode() == ISD::SUB) {
16710       SDValue SubOp0 = Sub.getOperand(0);
16711       SDValue SubOp1 = Sub.getOperand(1);
16712       if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16713           (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16714         return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16715                                SubOp1.getOperand(0),
16716                                DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16717       }
16718     }
16719   }
16720 
16721   // Looking for a truncate of i128 to i64.
16722   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16723     return SDValue();
16724 
16725   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16726 
16727   // SRL feeding TRUNCATE.
16728   if (Op0.getOpcode() == ISD::SRL) {
16729     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16730     // The right shift has to be by 64 bits.
16731     if (!ConstNode || ConstNode->getZExtValue() != 64)
16732       return SDValue();
16733 
16734     // Switch the element number to extract.
16735     EltToExtract = EltToExtract ? 0 : 1;
16736     // Update Op0 past the SRL.
16737     Op0 = Op0.getOperand(0);
16738   }
16739 
16740   // BITCAST feeding a TRUNCATE possibly via SRL.
16741   if (Op0.getOpcode() == ISD::BITCAST &&
16742       Op0.getValueType() == MVT::i128 &&
16743       Op0.getOperand(0).getValueType() == MVT::f128) {
16744     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16745     return DCI.DAG.getNode(
16746         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16747         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16748   }
16749   return SDValue();
16750 }
16751 
16752 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16753   SelectionDAG &DAG = DCI.DAG;
16754 
16755   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16756   if (!ConstOpOrElement)
16757     return SDValue();
16758 
16759   // An imul is usually smaller than the alternative sequence for legal type.
16760   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16761       isOperationLegal(ISD::MUL, N->getValueType(0)))
16762     return SDValue();
16763 
16764   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16765     switch (this->Subtarget.getCPUDirective()) {
16766     default:
16767       // TODO: enhance the condition for subtarget before pwr8
16768       return false;
16769     case PPC::DIR_PWR8:
16770       //  type        mul     add    shl
16771       // scalar        4       1      1
16772       // vector        7       2      2
16773       return true;
16774     case PPC::DIR_PWR9:
16775     case PPC::DIR_PWR10:
16776     case PPC::DIR_PWR_FUTURE:
16777       //  type        mul     add    shl
16778       // scalar        5       2      2
16779       // vector        7       2      2
16780 
16781       // The cycle RATIO of related operations are showed as a table above.
16782       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16783       // scalar and vector type. For 2 instrs patterns, add/sub + shl
16784       // are 4, it is always profitable; but for 3 instrs patterns
16785       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16786       // So we should only do it for vector type.
16787       return IsAddOne && IsNeg ? VT.isVector() : true;
16788     }
16789   };
16790 
16791   EVT VT = N->getValueType(0);
16792   SDLoc DL(N);
16793 
16794   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16795   bool IsNeg = MulAmt.isNegative();
16796   APInt MulAmtAbs = MulAmt.abs();
16797 
16798   if ((MulAmtAbs - 1).isPowerOf2()) {
16799     // (mul x, 2^N + 1) => (add (shl x, N), x)
16800     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16801 
16802     if (!IsProfitable(IsNeg, true, VT))
16803       return SDValue();
16804 
16805     SDValue Op0 = N->getOperand(0);
16806     SDValue Op1 =
16807         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16808                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16809     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16810 
16811     if (!IsNeg)
16812       return Res;
16813 
16814     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16815   } else if ((MulAmtAbs + 1).isPowerOf2()) {
16816     // (mul x, 2^N - 1) => (sub (shl x, N), x)
16817     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16818 
16819     if (!IsProfitable(IsNeg, false, VT))
16820       return SDValue();
16821 
16822     SDValue Op0 = N->getOperand(0);
16823     SDValue Op1 =
16824         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16825                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16826 
16827     if (!IsNeg)
16828       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16829     else
16830       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16831 
16832   } else {
16833     return SDValue();
16834   }
16835 }
16836 
16837 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16838 // in combiner since we need to check SD flags and other subtarget features.
16839 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16840                                           DAGCombinerInfo &DCI) const {
16841   SDValue N0 = N->getOperand(0);
16842   SDValue N1 = N->getOperand(1);
16843   SDValue N2 = N->getOperand(2);
16844   SDNodeFlags Flags = N->getFlags();
16845   EVT VT = N->getValueType(0);
16846   SelectionDAG &DAG = DCI.DAG;
16847   const TargetOptions &Options = getTargetMachine().Options;
16848   unsigned Opc = N->getOpcode();
16849   bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16850   bool LegalOps = !DCI.isBeforeLegalizeOps();
16851   SDLoc Loc(N);
16852 
16853   if (!isOperationLegal(ISD::FMA, VT))
16854     return SDValue();
16855 
16856   // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16857   // since (fnmsub a b c)=-0 while c-ab=+0.
16858   if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16859     return SDValue();
16860 
16861   // (fma (fneg a) b c) => (fnmsub a b c)
16862   // (fnmsub (fneg a) b c) => (fma a b c)
16863   if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16864     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16865 
16866   // (fma a (fneg b) c) => (fnmsub a b c)
16867   // (fnmsub a (fneg b) c) => (fma a b c)
16868   if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16869     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16870 
16871   return SDValue();
16872 }
16873 
16874 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16875   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16876   if (!Subtarget.is64BitELFABI())
16877     return false;
16878 
16879   // If not a tail call then no need to proceed.
16880   if (!CI->isTailCall())
16881     return false;
16882 
16883   // If sibling calls have been disabled and tail-calls aren't guaranteed
16884   // there is no reason to duplicate.
16885   auto &TM = getTargetMachine();
16886   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16887     return false;
16888 
16889   // Can't tail call a function called indirectly, or if it has variadic args.
16890   const Function *Callee = CI->getCalledFunction();
16891   if (!Callee || Callee->isVarArg())
16892     return false;
16893 
16894   // Make sure the callee and caller calling conventions are eligible for tco.
16895   const Function *Caller = CI->getParent()->getParent();
16896   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16897                                            CI->getCallingConv()))
16898       return false;
16899 
16900   // If the function is local then we have a good chance at tail-calling it
16901   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16902 }
16903 
16904 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16905   if (!Subtarget.hasVSX())
16906     return false;
16907   if (Subtarget.hasP9Vector() && VT == MVT::f128)
16908     return true;
16909   return VT == MVT::f32 || VT == MVT::f64 ||
16910     VT == MVT::v4f32 || VT == MVT::v2f64;
16911 }
16912 
16913 bool PPCTargetLowering::
16914 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16915   const Value *Mask = AndI.getOperand(1);
16916   // If the mask is suitable for andi. or andis. we should sink the and.
16917   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16918     // Can't handle constants wider than 64-bits.
16919     if (CI->getBitWidth() > 64)
16920       return false;
16921     int64_t ConstVal = CI->getZExtValue();
16922     return isUInt<16>(ConstVal) ||
16923       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16924   }
16925 
16926   // For non-constant masks, we can always use the record-form and.
16927   return true;
16928 }
16929 
16930 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16931 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16932 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16933 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16934 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16935 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16936   assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
16937   assert(Subtarget.hasP9Altivec() &&
16938          "Only combine this when P9 altivec supported!");
16939   EVT VT = N->getValueType(0);
16940   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16941     return SDValue();
16942 
16943   SelectionDAG &DAG = DCI.DAG;
16944   SDLoc dl(N);
16945   if (N->getOperand(0).getOpcode() == ISD::SUB) {
16946     // Even for signed integers, if it's known to be positive (as signed
16947     // integer) due to zero-extended inputs.
16948     unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16949     unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16950     if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16951          SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16952         (SubOpcd1 == ISD::ZERO_EXTEND ||
16953          SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16954       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16955                          N->getOperand(0)->getOperand(0),
16956                          N->getOperand(0)->getOperand(1),
16957                          DAG.getTargetConstant(0, dl, MVT::i32));
16958     }
16959 
16960     // For type v4i32, it can be optimized with xvnegsp + vabsduw
16961     if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16962         N->getOperand(0).hasOneUse()) {
16963       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16964                          N->getOperand(0)->getOperand(0),
16965                          N->getOperand(0)->getOperand(1),
16966                          DAG.getTargetConstant(1, dl, MVT::i32));
16967     }
16968   }
16969 
16970   return SDValue();
16971 }
16972 
16973 // For type v4i32/v8ii16/v16i8, transform
16974 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16975 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16976 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16977 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16978 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16979                                           DAGCombinerInfo &DCI) const {
16980   assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
16981   assert(Subtarget.hasP9Altivec() &&
16982          "Only combine this when P9 altivec supported!");
16983 
16984   SelectionDAG &DAG = DCI.DAG;
16985   SDLoc dl(N);
16986   SDValue Cond = N->getOperand(0);
16987   SDValue TrueOpnd = N->getOperand(1);
16988   SDValue FalseOpnd = N->getOperand(2);
16989   EVT VT = N->getOperand(1).getValueType();
16990 
16991   if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16992       FalseOpnd.getOpcode() != ISD::SUB)
16993     return SDValue();
16994 
16995   // ABSD only available for type v4i32/v8i16/v16i8
16996   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16997     return SDValue();
16998 
16999   // At least to save one more dependent computation
17000   if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
17001     return SDValue();
17002 
17003   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17004 
17005   // Can only handle unsigned comparison here
17006   switch (CC) {
17007   default:
17008     return SDValue();
17009   case ISD::SETUGT:
17010   case ISD::SETUGE:
17011     break;
17012   case ISD::SETULT:
17013   case ISD::SETULE:
17014     std::swap(TrueOpnd, FalseOpnd);
17015     break;
17016   }
17017 
17018   SDValue CmpOpnd1 = Cond.getOperand(0);
17019   SDValue CmpOpnd2 = Cond.getOperand(1);
17020 
17021   // SETCC CmpOpnd1 CmpOpnd2 cond
17022   // TrueOpnd = CmpOpnd1 - CmpOpnd2
17023   // FalseOpnd = CmpOpnd2 - CmpOpnd1
17024   if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
17025       TrueOpnd.getOperand(1) == CmpOpnd2 &&
17026       FalseOpnd.getOperand(0) == CmpOpnd2 &&
17027       FalseOpnd.getOperand(1) == CmpOpnd1) {
17028     return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
17029                        CmpOpnd1, CmpOpnd2,
17030                        DAG.getTargetConstant(0, dl, MVT::i32));
17031   }
17032 
17033   return SDValue();
17034 }
17035