1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SystemZTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
15 #include "SystemZConstantPoolValue.h"
16 #include "SystemZMachineFunctionInfo.h"
17 #include "SystemZTargetMachine.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22 #include "llvm/IR/Intrinsics.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/KnownBits.h"
26 #include <cctype>
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "systemz-lower"
31 
32 namespace {
33 // Represents information about a comparison.
34 struct Comparison {
35   Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
36     : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
37       Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
38 
39   // The operands to the comparison.
40   SDValue Op0, Op1;
41 
42   // Chain if this is a strict floating-point comparison.
43   SDValue Chain;
44 
45   // The opcode that should be used to compare Op0 and Op1.
46   unsigned Opcode;
47 
48   // A SystemZICMP value.  Only used for integer comparisons.
49   unsigned ICmpType;
50 
51   // The mask of CC values that Opcode can produce.
52   unsigned CCValid;
53 
54   // The mask of CC values for which the original condition is true.
55   unsigned CCMask;
56 };
57 } // end anonymous namespace
58 
59 // Classify VT as either 32 or 64 bit.
60 static bool is32Bit(EVT VT) {
61   switch (VT.getSimpleVT().SimpleTy) {
62   case MVT::i32:
63     return true;
64   case MVT::i64:
65     return false;
66   default:
67     llvm_unreachable("Unsupported type");
68   }
69 }
70 
71 // Return a version of MachineOperand that can be safely used before the
72 // final use.
73 static MachineOperand earlyUseOperand(MachineOperand Op) {
74   if (Op.isReg())
75     Op.setIsKill(false);
76   return Op;
77 }
78 
79 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
80                                              const SystemZSubtarget &STI)
81     : TargetLowering(TM), Subtarget(STI) {
82   MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
83 
84   // Set up the register classes.
85   if (Subtarget.hasHighWord())
86     addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
87   else
88     addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
89   addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
90   if (Subtarget.hasVector()) {
91     addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
92     addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
93   } else {
94     addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
95     addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
96   }
97   if (Subtarget.hasVectorEnhancements1())
98     addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
99   else
100     addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
101 
102   if (Subtarget.hasVector()) {
103     addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
104     addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
105     addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
106     addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
107     addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
108     addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
109   }
110 
111   // Compute derived properties from the register classes
112   computeRegisterProperties(Subtarget.getRegisterInfo());
113 
114   // Set up special registers.
115   setStackPointerRegisterToSaveRestore(SystemZ::R15D);
116 
117   // TODO: It may be better to default to latency-oriented scheduling, however
118   // LLVM's current latency-oriented scheduler can't handle physreg definitions
119   // such as SystemZ has with CC, so set this to the register-pressure
120   // scheduler, because it can.
121   setSchedulingPreference(Sched::RegPressure);
122 
123   setBooleanContents(ZeroOrOneBooleanContent);
124   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
125 
126   // Instructions are strings of 2-byte aligned 2-byte values.
127   setMinFunctionAlignment(Align(2));
128   // For performance reasons we prefer 16-byte alignment.
129   setPrefFunctionAlignment(Align(16));
130 
131   // Handle operations that are handled in a similar way for all types.
132   for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
133        I <= MVT::LAST_FP_VALUETYPE;
134        ++I) {
135     MVT VT = MVT::SimpleValueType(I);
136     if (isTypeLegal(VT)) {
137       // Lower SET_CC into an IPM-based sequence.
138       setOperationAction(ISD::SETCC, VT, Custom);
139       setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
140       setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
141 
142       // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
143       setOperationAction(ISD::SELECT, VT, Expand);
144 
145       // Lower SELECT_CC and BR_CC into separate comparisons and branches.
146       setOperationAction(ISD::SELECT_CC, VT, Custom);
147       setOperationAction(ISD::BR_CC,     VT, Custom);
148     }
149   }
150 
151   // Expand jump table branches as address arithmetic followed by an
152   // indirect jump.
153   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
154 
155   // Expand BRCOND into a BR_CC (see above).
156   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
157 
158   // Handle integer types.
159   for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
160        I <= MVT::LAST_INTEGER_VALUETYPE;
161        ++I) {
162     MVT VT = MVT::SimpleValueType(I);
163     if (isTypeLegal(VT)) {
164       // Expand individual DIV and REMs into DIVREMs.
165       setOperationAction(ISD::SDIV, VT, Expand);
166       setOperationAction(ISD::UDIV, VT, Expand);
167       setOperationAction(ISD::SREM, VT, Expand);
168       setOperationAction(ISD::UREM, VT, Expand);
169       setOperationAction(ISD::SDIVREM, VT, Custom);
170       setOperationAction(ISD::UDIVREM, VT, Custom);
171 
172       // Support addition/subtraction with overflow.
173       setOperationAction(ISD::SADDO, VT, Custom);
174       setOperationAction(ISD::SSUBO, VT, Custom);
175 
176       // Support addition/subtraction with carry.
177       setOperationAction(ISD::UADDO, VT, Custom);
178       setOperationAction(ISD::USUBO, VT, Custom);
179 
180       // Support carry in as value rather than glue.
181       setOperationAction(ISD::ADDCARRY, VT, Custom);
182       setOperationAction(ISD::SUBCARRY, VT, Custom);
183 
184       // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
185       // stores, putting a serialization instruction after the stores.
186       setOperationAction(ISD::ATOMIC_LOAD,  VT, Custom);
187       setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
188 
189       // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
190       // available, or if the operand is constant.
191       setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
192 
193       // Use POPCNT on z196 and above.
194       if (Subtarget.hasPopulationCount())
195         setOperationAction(ISD::CTPOP, VT, Custom);
196       else
197         setOperationAction(ISD::CTPOP, VT, Expand);
198 
199       // No special instructions for these.
200       setOperationAction(ISD::CTTZ,            VT, Expand);
201       setOperationAction(ISD::ROTR,            VT, Expand);
202 
203       // Use *MUL_LOHI where possible instead of MULH*.
204       setOperationAction(ISD::MULHS, VT, Expand);
205       setOperationAction(ISD::MULHU, VT, Expand);
206       setOperationAction(ISD::SMUL_LOHI, VT, Custom);
207       setOperationAction(ISD::UMUL_LOHI, VT, Custom);
208 
209       // Only z196 and above have native support for conversions to unsigned.
210       // On z10, promoting to i64 doesn't generate an inexact condition for
211       // values that are outside the i32 range but in the i64 range, so use
212       // the default expansion.
213       if (!Subtarget.hasFPExtension())
214         setOperationAction(ISD::FP_TO_UINT, VT, Expand);
215 
216       // Mirror those settings for STRICT_FP_TO_[SU]INT.  Note that these all
217       // default to Expand, so need to be modified to Legal where appropriate.
218       setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
219       if (Subtarget.hasFPExtension())
220         setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
221     }
222   }
223 
224   // Type legalization will convert 8- and 16-bit atomic operations into
225   // forms that operate on i32s (but still keeping the original memory VT).
226   // Lower them into full i32 operations.
227   setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Custom);
228   setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Custom);
229   setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Custom);
230   setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Custom);
231   setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Custom);
232   setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Custom);
233   setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
234   setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i32, Custom);
235   setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i32, Custom);
236   setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
237   setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
238 
239   // Even though i128 is not a legal type, we still need to custom lower
240   // the atomic operations in order to exploit SystemZ instructions.
241   setOperationAction(ISD::ATOMIC_LOAD,     MVT::i128, Custom);
242   setOperationAction(ISD::ATOMIC_STORE,    MVT::i128, Custom);
243 
244   // We can use the CC result of compare-and-swap to implement
245   // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
246   setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
247   setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
248   setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
249 
250   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
251 
252   // Traps are legal, as we will convert them to "j .+2".
253   setOperationAction(ISD::TRAP, MVT::Other, Legal);
254 
255   // z10 has instructions for signed but not unsigned FP conversion.
256   // Handle unsigned 32-bit types as signed 64-bit types.
257   if (!Subtarget.hasFPExtension()) {
258     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
259     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
260   }
261 
262   // We have native support for a 64-bit CTLZ, via FLOGR.
263   setOperationAction(ISD::CTLZ, MVT::i32, Promote);
264   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
265   setOperationAction(ISD::CTLZ, MVT::i64, Legal);
266 
267   // On z15 we have native support for a 64-bit CTPOP.
268   if (Subtarget.hasMiscellaneousExtensions3()) {
269     setOperationAction(ISD::CTPOP, MVT::i32, Promote);
270     setOperationAction(ISD::CTPOP, MVT::i64, Legal);
271   }
272 
273   // Give LowerOperation the chance to replace 64-bit ORs with subregs.
274   setOperationAction(ISD::OR, MVT::i64, Custom);
275 
276   // FIXME: Can we support these natively?
277   setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
278   setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
279   setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
280 
281   // We have native instructions for i8, i16 and i32 extensions, but not i1.
282   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
283   for (MVT VT : MVT::integer_valuetypes()) {
284     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
285     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
286     setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i1, Promote);
287   }
288 
289   // Handle the various types of symbolic address.
290   setOperationAction(ISD::ConstantPool,     PtrVT, Custom);
291   setOperationAction(ISD::GlobalAddress,    PtrVT, Custom);
292   setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
293   setOperationAction(ISD::BlockAddress,     PtrVT, Custom);
294   setOperationAction(ISD::JumpTable,        PtrVT, Custom);
295 
296   // We need to handle dynamic allocations specially because of the
297   // 160-byte area at the bottom of the stack.
298   setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
299   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
300 
301   // Use custom expanders so that we can force the function to use
302   // a frame pointer.
303   setOperationAction(ISD::STACKSAVE,    MVT::Other, Custom);
304   setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
305 
306   // Handle prefetches with PFD or PFDRL.
307   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
308 
309   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
310     // Assume by default that all vector operations need to be expanded.
311     for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
312       if (getOperationAction(Opcode, VT) == Legal)
313         setOperationAction(Opcode, VT, Expand);
314 
315     // Likewise all truncating stores and extending loads.
316     for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
317       setTruncStoreAction(VT, InnerVT, Expand);
318       setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
319       setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
320       setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
321     }
322 
323     if (isTypeLegal(VT)) {
324       // These operations are legal for anything that can be stored in a
325       // vector register, even if there is no native support for the format
326       // as such.  In particular, we can do these for v4f32 even though there
327       // are no specific instructions for that format.
328       setOperationAction(ISD::LOAD, VT, Legal);
329       setOperationAction(ISD::STORE, VT, Legal);
330       setOperationAction(ISD::VSELECT, VT, Legal);
331       setOperationAction(ISD::BITCAST, VT, Legal);
332       setOperationAction(ISD::UNDEF, VT, Legal);
333 
334       // Likewise, except that we need to replace the nodes with something
335       // more specific.
336       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
337       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
338     }
339   }
340 
341   // Handle integer vector types.
342   for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
343     if (isTypeLegal(VT)) {
344       // These operations have direct equivalents.
345       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
346       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
347       setOperationAction(ISD::ADD, VT, Legal);
348       setOperationAction(ISD::SUB, VT, Legal);
349       if (VT != MVT::v2i64)
350         setOperationAction(ISD::MUL, VT, Legal);
351       setOperationAction(ISD::AND, VT, Legal);
352       setOperationAction(ISD::OR, VT, Legal);
353       setOperationAction(ISD::XOR, VT, Legal);
354       if (Subtarget.hasVectorEnhancements1())
355         setOperationAction(ISD::CTPOP, VT, Legal);
356       else
357         setOperationAction(ISD::CTPOP, VT, Custom);
358       setOperationAction(ISD::CTTZ, VT, Legal);
359       setOperationAction(ISD::CTLZ, VT, Legal);
360 
361       // Convert a GPR scalar to a vector by inserting it into element 0.
362       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
363 
364       // Use a series of unpacks for extensions.
365       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
366       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
367 
368       // Detect shifts by a scalar amount and convert them into
369       // V*_BY_SCALAR.
370       setOperationAction(ISD::SHL, VT, Custom);
371       setOperationAction(ISD::SRA, VT, Custom);
372       setOperationAction(ISD::SRL, VT, Custom);
373 
374       // At present ROTL isn't matched by DAGCombiner.  ROTR should be
375       // converted into ROTL.
376       setOperationAction(ISD::ROTL, VT, Expand);
377       setOperationAction(ISD::ROTR, VT, Expand);
378 
379       // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
380       // and inverting the result as necessary.
381       setOperationAction(ISD::SETCC, VT, Custom);
382       setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
383       if (Subtarget.hasVectorEnhancements1())
384         setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
385     }
386   }
387 
388   if (Subtarget.hasVector()) {
389     // There should be no need to check for float types other than v2f64
390     // since <2 x f32> isn't a legal type.
391     setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
392     setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
393     setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
394     setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
395     setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
396     setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
397     setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
398     setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
399 
400     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
401     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);
402     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
403     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);
404   }
405 
406   if (Subtarget.hasVectorEnhancements2()) {
407     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
408     setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
409     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
410     setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
411     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
412     setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
413     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
414     setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
415 
416     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
417     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);
418     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
419     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);
420   }
421 
422   // Handle floating-point types.
423   for (unsigned I = MVT::FIRST_FP_VALUETYPE;
424        I <= MVT::LAST_FP_VALUETYPE;
425        ++I) {
426     MVT VT = MVT::SimpleValueType(I);
427     if (isTypeLegal(VT)) {
428       // We can use FI for FRINT.
429       setOperationAction(ISD::FRINT, VT, Legal);
430 
431       // We can use the extended form of FI for other rounding operations.
432       if (Subtarget.hasFPExtension()) {
433         setOperationAction(ISD::FNEARBYINT, VT, Legal);
434         setOperationAction(ISD::FFLOOR, VT, Legal);
435         setOperationAction(ISD::FCEIL, VT, Legal);
436         setOperationAction(ISD::FTRUNC, VT, Legal);
437         setOperationAction(ISD::FROUND, VT, Legal);
438       }
439 
440       // No special instructions for these.
441       setOperationAction(ISD::FSIN, VT, Expand);
442       setOperationAction(ISD::FCOS, VT, Expand);
443       setOperationAction(ISD::FSINCOS, VT, Expand);
444       setOperationAction(ISD::FREM, VT, Expand);
445       setOperationAction(ISD::FPOW, VT, Expand);
446 
447       // Handle constrained floating-point operations.
448       setOperationAction(ISD::STRICT_FADD, VT, Legal);
449       setOperationAction(ISD::STRICT_FSUB, VT, Legal);
450       setOperationAction(ISD::STRICT_FMUL, VT, Legal);
451       setOperationAction(ISD::STRICT_FDIV, VT, Legal);
452       setOperationAction(ISD::STRICT_FMA, VT, Legal);
453       setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
454       setOperationAction(ISD::STRICT_FRINT, VT, Legal);
455       setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
456       setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
457       if (Subtarget.hasFPExtension()) {
458         setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
459         setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
460         setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
461         setOperationAction(ISD::STRICT_FROUND, VT, Legal);
462         setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
463       }
464     }
465   }
466 
467   // Handle floating-point vector types.
468   if (Subtarget.hasVector()) {
469     // Scalar-to-vector conversion is just a subreg.
470     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
471     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
472 
473     // Some insertions and extractions can be done directly but others
474     // need to go via integers.
475     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
476     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
477     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
478     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
479 
480     // These operations have direct equivalents.
481     setOperationAction(ISD::FADD, MVT::v2f64, Legal);
482     setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
483     setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
484     setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
485     setOperationAction(ISD::FMA, MVT::v2f64, Legal);
486     setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
487     setOperationAction(ISD::FABS, MVT::v2f64, Legal);
488     setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
489     setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
490     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
491     setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
492     setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
493     setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
494     setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
495 
496     // Handle constrained floating-point operations.
497     setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
498     setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
499     setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
500     setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
501     setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
502     setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
503     setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
504     setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
505     setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
506     setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
507     setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
508     setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
509   }
510 
511   // The vector enhancements facility 1 has instructions for these.
512   if (Subtarget.hasVectorEnhancements1()) {
513     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
514     setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
515     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
516     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
517     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
518     setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
519     setOperationAction(ISD::FABS, MVT::v4f32, Legal);
520     setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
521     setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
522     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
523     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
524     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
525     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
526     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
527 
528     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
529     setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
530     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
531     setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
532 
533     setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
534     setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
535     setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
536     setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
537 
538     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
539     setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
540     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
541     setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
542 
543     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
544     setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
545     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
546     setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
547 
548     setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
549     setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
550     setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
551     setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
552 
553     // Handle constrained floating-point operations.
554     setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
555     setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
556     setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
557     setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
558     setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
559     setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
560     setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
561     setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
562     setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
563     setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
564     setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
565     setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
566     for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
567                      MVT::v4f32, MVT::v2f64 }) {
568       setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
569       setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
570     }
571   }
572 
573   // We have fused multiply-addition for f32 and f64 but not f128.
574   setOperationAction(ISD::FMA, MVT::f32,  Legal);
575   setOperationAction(ISD::FMA, MVT::f64,  Legal);
576   if (Subtarget.hasVectorEnhancements1())
577     setOperationAction(ISD::FMA, MVT::f128, Legal);
578   else
579     setOperationAction(ISD::FMA, MVT::f128, Expand);
580 
581   // We don't have a copysign instruction on vector registers.
582   if (Subtarget.hasVectorEnhancements1())
583     setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
584 
585   // Needed so that we don't try to implement f128 constant loads using
586   // a load-and-extend of a f80 constant (in cases where the constant
587   // would fit in an f80).
588   for (MVT VT : MVT::fp_valuetypes())
589     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
590 
591   // We don't have extending load instruction on vector registers.
592   if (Subtarget.hasVectorEnhancements1()) {
593     setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
594     setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
595   }
596 
597   // Floating-point truncation and stores need to be done separately.
598   setTruncStoreAction(MVT::f64,  MVT::f32, Expand);
599   setTruncStoreAction(MVT::f128, MVT::f32, Expand);
600   setTruncStoreAction(MVT::f128, MVT::f64, Expand);
601 
602   // We have 64-bit FPR<->GPR moves, but need special handling for
603   // 32-bit forms.
604   if (!Subtarget.hasVector()) {
605     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
606     setOperationAction(ISD::BITCAST, MVT::f32, Custom);
607   }
608 
609   // VASTART and VACOPY need to deal with the SystemZ-specific varargs
610   // structure, but VAEND is a no-op.
611   setOperationAction(ISD::VASTART, MVT::Other, Custom);
612   setOperationAction(ISD::VACOPY,  MVT::Other, Custom);
613   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
614 
615   // Codes for which we want to perform some z-specific combinations.
616   setTargetDAGCombine(ISD::ZERO_EXTEND);
617   setTargetDAGCombine(ISD::SIGN_EXTEND);
618   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
619   setTargetDAGCombine(ISD::LOAD);
620   setTargetDAGCombine(ISD::STORE);
621   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
622   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
623   setTargetDAGCombine(ISD::FP_ROUND);
624   setTargetDAGCombine(ISD::FP_EXTEND);
625   setTargetDAGCombine(ISD::BSWAP);
626   setTargetDAGCombine(ISD::SDIV);
627   setTargetDAGCombine(ISD::UDIV);
628   setTargetDAGCombine(ISD::SREM);
629   setTargetDAGCombine(ISD::UREM);
630 
631   // Handle intrinsics.
632   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
633   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
634 
635   // We want to use MVC in preference to even a single load/store pair.
636   MaxStoresPerMemcpy = 0;
637   MaxStoresPerMemcpyOptSize = 0;
638 
639   // The main memset sequence is a byte store followed by an MVC.
640   // Two STC or MV..I stores win over that, but the kind of fused stores
641   // generated by target-independent code don't when the byte value is
642   // variable.  E.g.  "STC <reg>;MHI <reg>,257;STH <reg>" is not better
643   // than "STC;MVC".  Handle the choice in target-specific code instead.
644   MaxStoresPerMemset = 0;
645   MaxStoresPerMemsetOptSize = 0;
646 
647   // Default to having -disable-strictnode-mutation on
648   IsStrictFPEnabled = true;
649 }
650 
651 EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
652                                               LLVMContext &, EVT VT) const {
653   if (!VT.isVector())
654     return MVT::i32;
655   return VT.changeVectorElementTypeToInteger();
656 }
657 
658 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
659     const MachineFunction &MF, EVT VT) const {
660   VT = VT.getScalarType();
661 
662   if (!VT.isSimple())
663     return false;
664 
665   switch (VT.getSimpleVT().SimpleTy) {
666   case MVT::f32:
667   case MVT::f64:
668     return true;
669   case MVT::f128:
670     return Subtarget.hasVectorEnhancements1();
671   default:
672     break;
673   }
674 
675   return false;
676 }
677 
678 // Return true if the constant can be generated with a vector instruction,
679 // such as VGM, VGMB or VREPI.
680 bool SystemZVectorConstantInfo::isVectorConstantLegal(
681     const SystemZSubtarget &Subtarget) {
682   const SystemZInstrInfo *TII =
683       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
684   if (!Subtarget.hasVector() ||
685       (isFP128 && !Subtarget.hasVectorEnhancements1()))
686     return false;
687 
688   // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
689   // preferred way of creating all-zero and all-one vectors so give it
690   // priority over other methods below.
691   unsigned Mask = 0;
692   unsigned I = 0;
693   for (; I < SystemZ::VectorBytes; ++I) {
694     uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
695     if (Byte == 0xff)
696       Mask |= 1ULL << I;
697     else if (Byte != 0)
698       break;
699   }
700   if (I == SystemZ::VectorBytes) {
701     Opcode = SystemZISD::BYTE_MASK;
702     OpVals.push_back(Mask);
703     VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
704     return true;
705   }
706 
707   if (SplatBitSize > 64)
708     return false;
709 
710   auto tryValue = [&](uint64_t Value) -> bool {
711     // Try VECTOR REPLICATE IMMEDIATE
712     int64_t SignedValue = SignExtend64(Value, SplatBitSize);
713     if (isInt<16>(SignedValue)) {
714       OpVals.push_back(((unsigned) SignedValue));
715       Opcode = SystemZISD::REPLICATE;
716       VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
717                                SystemZ::VectorBits / SplatBitSize);
718       return true;
719     }
720     // Try VECTOR GENERATE MASK
721     unsigned Start, End;
722     if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
723       // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
724       // denoting 1 << 63 and 63 denoting 1.  Convert them to bit numbers for
725       // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
726       OpVals.push_back(Start - (64 - SplatBitSize));
727       OpVals.push_back(End - (64 - SplatBitSize));
728       Opcode = SystemZISD::ROTATE_MASK;
729       VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
730                                SystemZ::VectorBits / SplatBitSize);
731       return true;
732     }
733     return false;
734   };
735 
736   // First try assuming that any undefined bits above the highest set bit
737   // and below the lowest set bit are 1s.  This increases the likelihood of
738   // being able to use a sign-extended element value in VECTOR REPLICATE
739   // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
740   uint64_t SplatBitsZ = SplatBits.getZExtValue();
741   uint64_t SplatUndefZ = SplatUndef.getZExtValue();
742   uint64_t Lower =
743       (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
744   uint64_t Upper =
745       (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
746   if (tryValue(SplatBitsZ | Upper | Lower))
747     return true;
748 
749   // Now try assuming that any undefined bits between the first and
750   // last defined set bits are set.  This increases the chances of
751   // using a non-wraparound mask.
752   uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
753   return tryValue(SplatBitsZ | Middle);
754 }
755 
756 SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
757   IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
758   isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
759 
760   // Find the smallest splat.
761   SplatBits = FPImm.bitcastToAPInt();
762   unsigned Width = SplatBits.getBitWidth();
763   while (Width > 8) {
764     unsigned HalfSize = Width / 2;
765     APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
766     APInt LowValue = SplatBits.trunc(HalfSize);
767 
768     // If the two halves do not match, stop here.
769     if (HighValue != LowValue || 8 > HalfSize)
770       break;
771 
772     SplatBits = HighValue;
773     Width = HalfSize;
774   }
775   SplatUndef = 0;
776   SplatBitSize = Width;
777 }
778 
779 SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
780   assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
781   bool HasAnyUndefs;
782 
783   // Get IntBits by finding the 128 bit splat.
784   BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
785                        true);
786 
787   // Get SplatBits by finding the 8 bit or greater splat.
788   BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
789                        true);
790 }
791 
792 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
793                                          bool ForCodeSize) const {
794   // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
795   if (Imm.isZero() || Imm.isNegZero())
796     return true;
797 
798   return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
799 }
800 
801 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
802   // We can use CGFI or CLGFI.
803   return isInt<32>(Imm) || isUInt<32>(Imm);
804 }
805 
806 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
807   // We can use ALGFI or SLGFI.
808   return isUInt<32>(Imm) || isUInt<32>(-Imm);
809 }
810 
811 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
812     EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
813   // Unaligned accesses should never be slower than the expanded version.
814   // We check specifically for aligned accesses in the few cases where
815   // they are required.
816   if (Fast)
817     *Fast = true;
818   return true;
819 }
820 
821 // Information about the addressing mode for a memory access.
822 struct AddressingMode {
823   // True if a long displacement is supported.
824   bool LongDisplacement;
825 
826   // True if use of index register is supported.
827   bool IndexReg;
828 
829   AddressingMode(bool LongDispl, bool IdxReg) :
830     LongDisplacement(LongDispl), IndexReg(IdxReg) {}
831 };
832 
833 // Return the desired addressing mode for a Load which has only one use (in
834 // the same block) which is a Store.
835 static AddressingMode getLoadStoreAddrMode(bool HasVector,
836                                           Type *Ty) {
837   // With vector support a Load->Store combination may be combined to either
838   // an MVC or vector operations and it seems to work best to allow the
839   // vector addressing mode.
840   if (HasVector)
841     return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
842 
843   // Otherwise only the MVC case is special.
844   bool MVC = Ty->isIntegerTy(8);
845   return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
846 }
847 
848 // Return the addressing mode which seems most desirable given an LLVM
849 // Instruction pointer.
850 static AddressingMode
851 supportedAddressingMode(Instruction *I, bool HasVector) {
852   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
853     switch (II->getIntrinsicID()) {
854     default: break;
855     case Intrinsic::memset:
856     case Intrinsic::memmove:
857     case Intrinsic::memcpy:
858       return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
859     }
860   }
861 
862   if (isa<LoadInst>(I) && I->hasOneUse()) {
863     auto *SingleUser = cast<Instruction>(*I->user_begin());
864     if (SingleUser->getParent() == I->getParent()) {
865       if (isa<ICmpInst>(SingleUser)) {
866         if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
867           if (C->getBitWidth() <= 64 &&
868               (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
869             // Comparison of memory with 16 bit signed / unsigned immediate
870             return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
871       } else if (isa<StoreInst>(SingleUser))
872         // Load->Store
873         return getLoadStoreAddrMode(HasVector, I->getType());
874     }
875   } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
876     if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
877       if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
878         // Load->Store
879         return getLoadStoreAddrMode(HasVector, LoadI->getType());
880   }
881 
882   if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
883 
884     // * Use LDE instead of LE/LEY for z13 to avoid partial register
885     //   dependencies (LDE only supports small offsets).
886     // * Utilize the vector registers to hold floating point
887     //   values (vector load / store instructions only support small
888     //   offsets).
889 
890     Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
891                          I->getOperand(0)->getType());
892     bool IsFPAccess = MemAccessTy->isFloatingPointTy();
893     bool IsVectorAccess = MemAccessTy->isVectorTy();
894 
895     // A store of an extracted vector element will be combined into a VSTE type
896     // instruction.
897     if (!IsVectorAccess && isa<StoreInst>(I)) {
898       Value *DataOp = I->getOperand(0);
899       if (isa<ExtractElementInst>(DataOp))
900         IsVectorAccess = true;
901     }
902 
903     // A load which gets inserted into a vector element will be combined into a
904     // VLE type instruction.
905     if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
906       User *LoadUser = *I->user_begin();
907       if (isa<InsertElementInst>(LoadUser))
908         IsVectorAccess = true;
909     }
910 
911     if (IsFPAccess || IsVectorAccess)
912       return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
913   }
914 
915   return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
916 }
917 
918 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
919        const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
920   // Punt on globals for now, although they can be used in limited
921   // RELATIVE LONG cases.
922   if (AM.BaseGV)
923     return false;
924 
925   // Require a 20-bit signed offset.
926   if (!isInt<20>(AM.BaseOffs))
927     return false;
928 
929   AddressingMode SupportedAM(true, true);
930   if (I != nullptr)
931     SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
932 
933   if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
934     return false;
935 
936   if (!SupportedAM.IndexReg)
937     // No indexing allowed.
938     return AM.Scale == 0;
939   else
940     // Indexing is OK but no scale factor can be applied.
941     return AM.Scale == 0 || AM.Scale == 1;
942 }
943 
944 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
945   if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
946     return false;
947   unsigned FromBits = FromType->getPrimitiveSizeInBits();
948   unsigned ToBits = ToType->getPrimitiveSizeInBits();
949   return FromBits > ToBits;
950 }
951 
952 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
953   if (!FromVT.isInteger() || !ToVT.isInteger())
954     return false;
955   unsigned FromBits = FromVT.getSizeInBits();
956   unsigned ToBits = ToVT.getSizeInBits();
957   return FromBits > ToBits;
958 }
959 
960 //===----------------------------------------------------------------------===//
961 // Inline asm support
962 //===----------------------------------------------------------------------===//
963 
964 TargetLowering::ConstraintType
965 SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
966   if (Constraint.size() == 1) {
967     switch (Constraint[0]) {
968     case 'a': // Address register
969     case 'd': // Data register (equivalent to 'r')
970     case 'f': // Floating-point register
971     case 'h': // High-part register
972     case 'r': // General-purpose register
973     case 'v': // Vector register
974       return C_RegisterClass;
975 
976     case 'Q': // Memory with base and unsigned 12-bit displacement
977     case 'R': // Likewise, plus an index
978     case 'S': // Memory with base and signed 20-bit displacement
979     case 'T': // Likewise, plus an index
980     case 'm': // Equivalent to 'T'.
981       return C_Memory;
982 
983     case 'I': // Unsigned 8-bit constant
984     case 'J': // Unsigned 12-bit constant
985     case 'K': // Signed 16-bit constant
986     case 'L': // Signed 20-bit displacement (on all targets we support)
987     case 'M': // 0x7fffffff
988       return C_Immediate;
989 
990     default:
991       break;
992     }
993   }
994   return TargetLowering::getConstraintType(Constraint);
995 }
996 
997 TargetLowering::ConstraintWeight SystemZTargetLowering::
998 getSingleConstraintMatchWeight(AsmOperandInfo &info,
999                                const char *constraint) const {
1000   ConstraintWeight weight = CW_Invalid;
1001   Value *CallOperandVal = info.CallOperandVal;
1002   // If we don't have a value, we can't do a match,
1003   // but allow it at the lowest weight.
1004   if (!CallOperandVal)
1005     return CW_Default;
1006   Type *type = CallOperandVal->getType();
1007   // Look at the constraint type.
1008   switch (*constraint) {
1009   default:
1010     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
1011     break;
1012 
1013   case 'a': // Address register
1014   case 'd': // Data register (equivalent to 'r')
1015   case 'h': // High-part register
1016   case 'r': // General-purpose register
1017     if (CallOperandVal->getType()->isIntegerTy())
1018       weight = CW_Register;
1019     break;
1020 
1021   case 'f': // Floating-point register
1022     if (type->isFloatingPointTy())
1023       weight = CW_Register;
1024     break;
1025 
1026   case 'v': // Vector register
1027     if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1028         Subtarget.hasVector())
1029       weight = CW_Register;
1030     break;
1031 
1032   case 'I': // Unsigned 8-bit constant
1033     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1034       if (isUInt<8>(C->getZExtValue()))
1035         weight = CW_Constant;
1036     break;
1037 
1038   case 'J': // Unsigned 12-bit constant
1039     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1040       if (isUInt<12>(C->getZExtValue()))
1041         weight = CW_Constant;
1042     break;
1043 
1044   case 'K': // Signed 16-bit constant
1045     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1046       if (isInt<16>(C->getSExtValue()))
1047         weight = CW_Constant;
1048     break;
1049 
1050   case 'L': // Signed 20-bit displacement (on all targets we support)
1051     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1052       if (isInt<20>(C->getSExtValue()))
1053         weight = CW_Constant;
1054     break;
1055 
1056   case 'M': // 0x7fffffff
1057     if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1058       if (C->getZExtValue() == 0x7fffffff)
1059         weight = CW_Constant;
1060     break;
1061   }
1062   return weight;
1063 }
1064 
1065 // Parse a "{tNNN}" register constraint for which the register type "t"
1066 // has already been verified.  MC is the class associated with "t" and
1067 // Map maps 0-based register numbers to LLVM register numbers.
1068 static std::pair<unsigned, const TargetRegisterClass *>
1069 parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
1070                     const unsigned *Map, unsigned Size) {
1071   assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1072   if (isdigit(Constraint[2])) {
1073     unsigned Index;
1074     bool Failed =
1075         Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1076     if (!Failed && Index < Size && Map[Index])
1077       return std::make_pair(Map[Index], RC);
1078   }
1079   return std::make_pair(0U, nullptr);
1080 }
1081 
1082 std::pair<unsigned, const TargetRegisterClass *>
1083 SystemZTargetLowering::getRegForInlineAsmConstraint(
1084     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1085   if (Constraint.size() == 1) {
1086     // GCC Constraint Letters
1087     switch (Constraint[0]) {
1088     default: break;
1089     case 'd': // Data register (equivalent to 'r')
1090     case 'r': // General-purpose register
1091       if (VT == MVT::i64)
1092         return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1093       else if (VT == MVT::i128)
1094         return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1095       return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1096 
1097     case 'a': // Address register
1098       if (VT == MVT::i64)
1099         return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1100       else if (VT == MVT::i128)
1101         return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1102       return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1103 
1104     case 'h': // High-part register (an LLVM extension)
1105       return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1106 
1107     case 'f': // Floating-point register
1108       if (VT == MVT::f64)
1109         return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1110       else if (VT == MVT::f128)
1111         return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1112       return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1113 
1114     case 'v': // Vector register
1115       if (Subtarget.hasVector()) {
1116         if (VT == MVT::f32)
1117           return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1118         if (VT == MVT::f64)
1119           return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1120         return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1121       }
1122       break;
1123     }
1124   }
1125   if (Constraint.size() > 0 && Constraint[0] == '{') {
1126     // We need to override the default register parsing for GPRs and FPRs
1127     // because the interpretation depends on VT.  The internal names of
1128     // the registers are also different from the external names
1129     // (F0D and F0S instead of F0, etc.).
1130     if (Constraint[1] == 'r') {
1131       if (VT == MVT::i32)
1132         return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1133                                    SystemZMC::GR32Regs, 16);
1134       if (VT == MVT::i128)
1135         return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1136                                    SystemZMC::GR128Regs, 16);
1137       return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1138                                  SystemZMC::GR64Regs, 16);
1139     }
1140     if (Constraint[1] == 'f') {
1141       if (VT == MVT::f32)
1142         return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1143                                    SystemZMC::FP32Regs, 16);
1144       if (VT == MVT::f128)
1145         return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1146                                    SystemZMC::FP128Regs, 16);
1147       return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1148                                  SystemZMC::FP64Regs, 16);
1149     }
1150     if (Constraint[1] == 'v') {
1151       if (VT == MVT::f32)
1152         return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1153                                    SystemZMC::VR32Regs, 32);
1154       if (VT == MVT::f64)
1155         return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1156                                    SystemZMC::VR64Regs, 32);
1157       return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1158                                  SystemZMC::VR128Regs, 32);
1159     }
1160   }
1161   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1162 }
1163 
1164 void SystemZTargetLowering::
1165 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1166                              std::vector<SDValue> &Ops,
1167                              SelectionDAG &DAG) const {
1168   // Only support length 1 constraints for now.
1169   if (Constraint.length() == 1) {
1170     switch (Constraint[0]) {
1171     case 'I': // Unsigned 8-bit constant
1172       if (auto *C = dyn_cast<ConstantSDNode>(Op))
1173         if (isUInt<8>(C->getZExtValue()))
1174           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1175                                               Op.getValueType()));
1176       return;
1177 
1178     case 'J': // Unsigned 12-bit constant
1179       if (auto *C = dyn_cast<ConstantSDNode>(Op))
1180         if (isUInt<12>(C->getZExtValue()))
1181           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1182                                               Op.getValueType()));
1183       return;
1184 
1185     case 'K': // Signed 16-bit constant
1186       if (auto *C = dyn_cast<ConstantSDNode>(Op))
1187         if (isInt<16>(C->getSExtValue()))
1188           Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1189                                               Op.getValueType()));
1190       return;
1191 
1192     case 'L': // Signed 20-bit displacement (on all targets we support)
1193       if (auto *C = dyn_cast<ConstantSDNode>(Op))
1194         if (isInt<20>(C->getSExtValue()))
1195           Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1196                                               Op.getValueType()));
1197       return;
1198 
1199     case 'M': // 0x7fffffff
1200       if (auto *C = dyn_cast<ConstantSDNode>(Op))
1201         if (C->getZExtValue() == 0x7fffffff)
1202           Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1203                                               Op.getValueType()));
1204       return;
1205     }
1206   }
1207   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1208 }
1209 
1210 //===----------------------------------------------------------------------===//
1211 // Calling conventions
1212 //===----------------------------------------------------------------------===//
1213 
1214 #include "SystemZGenCallingConv.inc"
1215 
1216 const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
1217   CallingConv::ID) const {
1218   static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1219                                            SystemZ::R14D, 0 };
1220   return ScratchRegs;
1221 }
1222 
1223 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
1224                                                      Type *ToType) const {
1225   return isTruncateFree(FromType, ToType);
1226 }
1227 
1228 bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1229   return CI->isTailCall();
1230 }
1231 
1232 // We do not yet support 128-bit single-element vector types.  If the user
1233 // attempts to use such types as function argument or return type, prefer
1234 // to error out instead of emitting code violating the ABI.
1235 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1236   if (ArgVT.isVector() && !VT.isVector())
1237     report_fatal_error("Unsupported vector argument or return type");
1238 }
1239 
1240 static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
1241   for (unsigned i = 0; i < Ins.size(); ++i)
1242     VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1243 }
1244 
1245 static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
1246   for (unsigned i = 0; i < Outs.size(); ++i)
1247     VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1248 }
1249 
1250 // Value is a value that has been passed to us in the location described by VA
1251 // (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
1252 // any loads onto Chain.
1253 static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
1254                                    CCValAssign &VA, SDValue Chain,
1255                                    SDValue Value) {
1256   // If the argument has been promoted from a smaller type, insert an
1257   // assertion to capture this.
1258   if (VA.getLocInfo() == CCValAssign::SExt)
1259     Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1260                         DAG.getValueType(VA.getValVT()));
1261   else if (VA.getLocInfo() == CCValAssign::ZExt)
1262     Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1263                         DAG.getValueType(VA.getValVT()));
1264 
1265   if (VA.isExtInLoc())
1266     Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1267   else if (VA.getLocInfo() == CCValAssign::BCvt) {
1268     // If this is a short vector argument loaded from the stack,
1269     // extend from i64 to full vector size and then bitcast.
1270     assert(VA.getLocVT() == MVT::i64);
1271     assert(VA.getValVT().isVector());
1272     Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1273     Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1274   } else
1275     assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1276   return Value;
1277 }
1278 
1279 // Value is a value of type VA.getValVT() that we need to copy into
1280 // the location described by VA.  Return a copy of Value converted to
1281 // VA.getValVT().  The caller is responsible for handling indirect values.
1282 static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
1283                                    CCValAssign &VA, SDValue Value) {
1284   switch (VA.getLocInfo()) {
1285   case CCValAssign::SExt:
1286     return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1287   case CCValAssign::ZExt:
1288     return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1289   case CCValAssign::AExt:
1290     return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1291   case CCValAssign::BCvt:
1292     // If this is a short vector argument to be stored to the stack,
1293     // bitcast to v2i64 and then extract first element.
1294     assert(VA.getLocVT() == MVT::i64);
1295     assert(VA.getValVT().isVector());
1296     Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
1297     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1298                        DAG.getConstant(0, DL, MVT::i32));
1299   case CCValAssign::Full:
1300     return Value;
1301   default:
1302     llvm_unreachable("Unhandled getLocInfo()");
1303   }
1304 }
1305 
1306 SDValue SystemZTargetLowering::LowerFormalArguments(
1307     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1308     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1309     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1310   MachineFunction &MF = DAG.getMachineFunction();
1311   MachineFrameInfo &MFI = MF.getFrameInfo();
1312   MachineRegisterInfo &MRI = MF.getRegInfo();
1313   SystemZMachineFunctionInfo *FuncInfo =
1314       MF.getInfo<SystemZMachineFunctionInfo>();
1315   auto *TFL =
1316       static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
1317   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1318 
1319   // Detect unsupported vector argument types.
1320   if (Subtarget.hasVector())
1321     VerifyVectorTypes(Ins);
1322 
1323   // Assign locations to all of the incoming arguments.
1324   SmallVector<CCValAssign, 16> ArgLocs;
1325   SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1326   CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1327 
1328   unsigned NumFixedGPRs = 0;
1329   unsigned NumFixedFPRs = 0;
1330   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1331     SDValue ArgValue;
1332     CCValAssign &VA = ArgLocs[I];
1333     EVT LocVT = VA.getLocVT();
1334     if (VA.isRegLoc()) {
1335       // Arguments passed in registers
1336       const TargetRegisterClass *RC;
1337       switch (LocVT.getSimpleVT().SimpleTy) {
1338       default:
1339         // Integers smaller than i64 should be promoted to i64.
1340         llvm_unreachable("Unexpected argument type");
1341       case MVT::i32:
1342         NumFixedGPRs += 1;
1343         RC = &SystemZ::GR32BitRegClass;
1344         break;
1345       case MVT::i64:
1346         NumFixedGPRs += 1;
1347         RC = &SystemZ::GR64BitRegClass;
1348         break;
1349       case MVT::f32:
1350         NumFixedFPRs += 1;
1351         RC = &SystemZ::FP32BitRegClass;
1352         break;
1353       case MVT::f64:
1354         NumFixedFPRs += 1;
1355         RC = &SystemZ::FP64BitRegClass;
1356         break;
1357       case MVT::v16i8:
1358       case MVT::v8i16:
1359       case MVT::v4i32:
1360       case MVT::v2i64:
1361       case MVT::v4f32:
1362       case MVT::v2f64:
1363         RC = &SystemZ::VR128BitRegClass;
1364         break;
1365       }
1366 
1367       Register VReg = MRI.createVirtualRegister(RC);
1368       MRI.addLiveIn(VA.getLocReg(), VReg);
1369       ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1370     } else {
1371       assert(VA.isMemLoc() && "Argument not register or memory");
1372 
1373       // Create the frame index object for this incoming parameter.
1374       int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1375                                      VA.getLocMemOffset(), true);
1376 
1377       // Create the SelectionDAG nodes corresponding to a load
1378       // from this parameter.  Unpromoted ints and floats are
1379       // passed as right-justified 8-byte values.
1380       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1381       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1382         FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1383                           DAG.getIntPtrConstant(4, DL));
1384       ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1385                              MachinePointerInfo::getFixedStack(MF, FI));
1386     }
1387 
1388     // Convert the value of the argument register into the value that's
1389     // being passed.
1390     if (VA.getLocInfo() == CCValAssign::Indirect) {
1391       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1392                                    MachinePointerInfo()));
1393       // If the original argument was split (e.g. i128), we need
1394       // to load all parts of it here (using the same address).
1395       unsigned ArgIndex = Ins[I].OrigArgIndex;
1396       assert (Ins[I].PartOffset == 0);
1397       while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1398         CCValAssign &PartVA = ArgLocs[I + 1];
1399         unsigned PartOffset = Ins[I + 1].PartOffset;
1400         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1401                                       DAG.getIntPtrConstant(PartOffset, DL));
1402         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1403                                      MachinePointerInfo()));
1404         ++I;
1405       }
1406     } else
1407       InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1408   }
1409 
1410   if (IsVarArg) {
1411     // Save the number of non-varargs registers for later use by va_start, etc.
1412     FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1413     FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1414 
1415     // Likewise the address (in the form of a frame index) of where the
1416     // first stack vararg would be.  The 1-byte size here is arbitrary.
1417     int64_t StackSize = CCInfo.getNextStackOffset();
1418     FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1419 
1420     // ...and a similar frame index for the caller-allocated save area
1421     // that will be used to store the incoming registers.
1422     int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
1423     unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1424     FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1425 
1426     // Store the FPR varargs in the reserved frame slots.  (We store the
1427     // GPRs as part of the prologue.)
1428     if (NumFixedFPRs < SystemZ::NumArgFPRs) {
1429       SDValue MemOps[SystemZ::NumArgFPRs];
1430       for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
1431         unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
1432         int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
1433         SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1434         unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
1435                                      &SystemZ::FP64BitRegClass);
1436         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1437         MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1438                                  MachinePointerInfo::getFixedStack(MF, FI));
1439       }
1440       // Join the stores, which are independent of one another.
1441       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1442                           makeArrayRef(&MemOps[NumFixedFPRs],
1443                                        SystemZ::NumArgFPRs-NumFixedFPRs));
1444     }
1445   }
1446 
1447   return Chain;
1448 }
1449 
1450 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1451                               SmallVectorImpl<CCValAssign> &ArgLocs,
1452                               SmallVectorImpl<ISD::OutputArg> &Outs) {
1453   // Punt if there are any indirect or stack arguments, or if the call
1454   // needs the callee-saved argument register R6, or if the call uses
1455   // the callee-saved register arguments SwiftSelf and SwiftError.
1456   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1457     CCValAssign &VA = ArgLocs[I];
1458     if (VA.getLocInfo() == CCValAssign::Indirect)
1459       return false;
1460     if (!VA.isRegLoc())
1461       return false;
1462     Register Reg = VA.getLocReg();
1463     if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1464       return false;
1465     if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1466       return false;
1467   }
1468   return true;
1469 }
1470 
1471 SDValue
1472 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
1473                                  SmallVectorImpl<SDValue> &InVals) const {
1474   SelectionDAG &DAG = CLI.DAG;
1475   SDLoc &DL = CLI.DL;
1476   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1477   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1478   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1479   SDValue Chain = CLI.Chain;
1480   SDValue Callee = CLI.Callee;
1481   bool &IsTailCall = CLI.IsTailCall;
1482   CallingConv::ID CallConv = CLI.CallConv;
1483   bool IsVarArg = CLI.IsVarArg;
1484   MachineFunction &MF = DAG.getMachineFunction();
1485   EVT PtrVT = getPointerTy(MF.getDataLayout());
1486 
1487   // Detect unsupported vector argument and return types.
1488   if (Subtarget.hasVector()) {
1489     VerifyVectorTypes(Outs);
1490     VerifyVectorTypes(Ins);
1491   }
1492 
1493   // Analyze the operands of the call, assigning locations to each operand.
1494   SmallVector<CCValAssign, 16> ArgLocs;
1495   SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1496   ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1497 
1498   // We don't support GuaranteedTailCallOpt, only automatically-detected
1499   // sibling calls.
1500   if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1501     IsTailCall = false;
1502 
1503   // Get a count of how many bytes are to be pushed on the stack.
1504   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1505 
1506   // Mark the start of the call.
1507   if (!IsTailCall)
1508     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1509 
1510   // Copy argument values to their designated locations.
1511   SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
1512   SmallVector<SDValue, 8> MemOpChains;
1513   SDValue StackPtr;
1514   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1515     CCValAssign &VA = ArgLocs[I];
1516     SDValue ArgValue = OutVals[I];
1517 
1518     if (VA.getLocInfo() == CCValAssign::Indirect) {
1519       // Store the argument in a stack slot and pass its address.
1520       SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
1521       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1522       MemOpChains.push_back(
1523           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1524                        MachinePointerInfo::getFixedStack(MF, FI)));
1525       // If the original argument was split (e.g. i128), we need
1526       // to store all parts of it here (and pass just one address).
1527       unsigned ArgIndex = Outs[I].OrigArgIndex;
1528       assert (Outs[I].PartOffset == 0);
1529       while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1530         SDValue PartValue = OutVals[I + 1];
1531         unsigned PartOffset = Outs[I + 1].PartOffset;
1532         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1533                                       DAG.getIntPtrConstant(PartOffset, DL));
1534         MemOpChains.push_back(
1535             DAG.getStore(Chain, DL, PartValue, Address,
1536                          MachinePointerInfo::getFixedStack(MF, FI)));
1537         ++I;
1538       }
1539       ArgValue = SpillSlot;
1540     } else
1541       ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1542 
1543     if (VA.isRegLoc())
1544       // Queue up the argument copies and emit them at the end.
1545       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1546     else {
1547       assert(VA.isMemLoc() && "Argument not register or memory");
1548 
1549       // Work out the address of the stack slot.  Unpromoted ints and
1550       // floats are passed as right-justified 8-byte values.
1551       if (!StackPtr.getNode())
1552         StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1553       unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
1554       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1555         Offset += 4;
1556       SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1557                                     DAG.getIntPtrConstant(Offset, DL));
1558 
1559       // Emit the store.
1560       MemOpChains.push_back(
1561           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1562     }
1563   }
1564 
1565   // Join the stores, which are independent of one another.
1566   if (!MemOpChains.empty())
1567     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1568 
1569   // Accept direct calls by converting symbolic call addresses to the
1570   // associated Target* opcodes.  Force %r1 to be used for indirect
1571   // tail calls.
1572   SDValue Glue;
1573   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1574     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1575     Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1576   } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1577     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1578     Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1579   } else if (IsTailCall) {
1580     Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1581     Glue = Chain.getValue(1);
1582     Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1583   }
1584 
1585   // Build a sequence of copy-to-reg nodes, chained and glued together.
1586   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1587     Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1588                              RegsToPass[I].second, Glue);
1589     Glue = Chain.getValue(1);
1590   }
1591 
1592   // The first call operand is the chain and the second is the target address.
1593   SmallVector<SDValue, 8> Ops;
1594   Ops.push_back(Chain);
1595   Ops.push_back(Callee);
1596 
1597   // Add argument registers to the end of the list so that they are
1598   // known live into the call.
1599   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1600     Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1601                                   RegsToPass[I].second.getValueType()));
1602 
1603   // Add a register mask operand representing the call-preserved registers.
1604   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1605   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1606   assert(Mask && "Missing call preserved mask for calling convention");
1607   Ops.push_back(DAG.getRegisterMask(Mask));
1608 
1609   // Glue the call to the argument copies, if any.
1610   if (Glue.getNode())
1611     Ops.push_back(Glue);
1612 
1613   // Emit the call.
1614   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1615   if (IsTailCall)
1616     return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1617   Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1618   Glue = Chain.getValue(1);
1619 
1620   // Mark the end of the call, which is glued to the call itself.
1621   Chain = DAG.getCALLSEQ_END(Chain,
1622                              DAG.getConstant(NumBytes, DL, PtrVT, true),
1623                              DAG.getConstant(0, DL, PtrVT, true),
1624                              Glue, DL);
1625   Glue = Chain.getValue(1);
1626 
1627   // Assign locations to each value returned by this call.
1628   SmallVector<CCValAssign, 16> RetLocs;
1629   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1630   RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1631 
1632   // Copy all of the result registers out of their specified physreg.
1633   for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1634     CCValAssign &VA = RetLocs[I];
1635 
1636     // Copy the value out, gluing the copy to the end of the call sequence.
1637     SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1638                                           VA.getLocVT(), Glue);
1639     Chain = RetValue.getValue(1);
1640     Glue = RetValue.getValue(2);
1641 
1642     // Convert the value of the return register into the value that's
1643     // being returned.
1644     InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1645   }
1646 
1647   return Chain;
1648 }
1649 
1650 bool SystemZTargetLowering::
1651 CanLowerReturn(CallingConv::ID CallConv,
1652                MachineFunction &MF, bool isVarArg,
1653                const SmallVectorImpl<ISD::OutputArg> &Outs,
1654                LLVMContext &Context) const {
1655   // Detect unsupported vector return types.
1656   if (Subtarget.hasVector())
1657     VerifyVectorTypes(Outs);
1658 
1659   // Special case that we cannot easily detect in RetCC_SystemZ since
1660   // i128 is not a legal type.
1661   for (auto &Out : Outs)
1662     if (Out.ArgVT == MVT::i128)
1663       return false;
1664 
1665   SmallVector<CCValAssign, 16> RetLocs;
1666   CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1667   return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1668 }
1669 
1670 SDValue
1671 SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1672                                    bool IsVarArg,
1673                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1674                                    const SmallVectorImpl<SDValue> &OutVals,
1675                                    const SDLoc &DL, SelectionDAG &DAG) const {
1676   MachineFunction &MF = DAG.getMachineFunction();
1677 
1678   // Detect unsupported vector return types.
1679   if (Subtarget.hasVector())
1680     VerifyVectorTypes(Outs);
1681 
1682   // Assign locations to each returned value.
1683   SmallVector<CCValAssign, 16> RetLocs;
1684   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1685   RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1686 
1687   // Quick exit for void returns
1688   if (RetLocs.empty())
1689     return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1690 
1691   if (CallConv == CallingConv::GHC)
1692     report_fatal_error("GHC functions return void only");
1693 
1694   // Copy the result values into the output registers.
1695   SDValue Glue;
1696   SmallVector<SDValue, 4> RetOps;
1697   RetOps.push_back(Chain);
1698   for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1699     CCValAssign &VA = RetLocs[I];
1700     SDValue RetValue = OutVals[I];
1701 
1702     // Make the return register live on exit.
1703     assert(VA.isRegLoc() && "Can only return in registers!");
1704 
1705     // Promote the value as required.
1706     RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1707 
1708     // Chain and glue the copies together.
1709     Register Reg = VA.getLocReg();
1710     Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1711     Glue = Chain.getValue(1);
1712     RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1713   }
1714 
1715   // Update chain and glue.
1716   RetOps[0] = Chain;
1717   if (Glue.getNode())
1718     RetOps.push_back(Glue);
1719 
1720   return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1721 }
1722 
1723 // Return true if Op is an intrinsic node with chain that returns the CC value
1724 // as its only (other) argument.  Provide the associated SystemZISD opcode and
1725 // the mask of valid CC values if so.
1726 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1727                                       unsigned &CCValid) {
1728   unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1729   switch (Id) {
1730   case Intrinsic::s390_tbegin:
1731     Opcode = SystemZISD::TBEGIN;
1732     CCValid = SystemZ::CCMASK_TBEGIN;
1733     return true;
1734 
1735   case Intrinsic::s390_tbegin_nofloat:
1736     Opcode = SystemZISD::TBEGIN_NOFLOAT;
1737     CCValid = SystemZ::CCMASK_TBEGIN;
1738     return true;
1739 
1740   case Intrinsic::s390_tend:
1741     Opcode = SystemZISD::TEND;
1742     CCValid = SystemZ::CCMASK_TEND;
1743     return true;
1744 
1745   default:
1746     return false;
1747   }
1748 }
1749 
1750 // Return true if Op is an intrinsic node without chain that returns the
1751 // CC value as its final argument.  Provide the associated SystemZISD
1752 // opcode and the mask of valid CC values if so.
1753 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1754   unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1755   switch (Id) {
1756   case Intrinsic::s390_vpkshs:
1757   case Intrinsic::s390_vpksfs:
1758   case Intrinsic::s390_vpksgs:
1759     Opcode = SystemZISD::PACKS_CC;
1760     CCValid = SystemZ::CCMASK_VCMP;
1761     return true;
1762 
1763   case Intrinsic::s390_vpklshs:
1764   case Intrinsic::s390_vpklsfs:
1765   case Intrinsic::s390_vpklsgs:
1766     Opcode = SystemZISD::PACKLS_CC;
1767     CCValid = SystemZ::CCMASK_VCMP;
1768     return true;
1769 
1770   case Intrinsic::s390_vceqbs:
1771   case Intrinsic::s390_vceqhs:
1772   case Intrinsic::s390_vceqfs:
1773   case Intrinsic::s390_vceqgs:
1774     Opcode = SystemZISD::VICMPES;
1775     CCValid = SystemZ::CCMASK_VCMP;
1776     return true;
1777 
1778   case Intrinsic::s390_vchbs:
1779   case Intrinsic::s390_vchhs:
1780   case Intrinsic::s390_vchfs:
1781   case Intrinsic::s390_vchgs:
1782     Opcode = SystemZISD::VICMPHS;
1783     CCValid = SystemZ::CCMASK_VCMP;
1784     return true;
1785 
1786   case Intrinsic::s390_vchlbs:
1787   case Intrinsic::s390_vchlhs:
1788   case Intrinsic::s390_vchlfs:
1789   case Intrinsic::s390_vchlgs:
1790     Opcode = SystemZISD::VICMPHLS;
1791     CCValid = SystemZ::CCMASK_VCMP;
1792     return true;
1793 
1794   case Intrinsic::s390_vtm:
1795     Opcode = SystemZISD::VTM;
1796     CCValid = SystemZ::CCMASK_VCMP;
1797     return true;
1798 
1799   case Intrinsic::s390_vfaebs:
1800   case Intrinsic::s390_vfaehs:
1801   case Intrinsic::s390_vfaefs:
1802     Opcode = SystemZISD::VFAE_CC;
1803     CCValid = SystemZ::CCMASK_ANY;
1804     return true;
1805 
1806   case Intrinsic::s390_vfaezbs:
1807   case Intrinsic::s390_vfaezhs:
1808   case Intrinsic::s390_vfaezfs:
1809     Opcode = SystemZISD::VFAEZ_CC;
1810     CCValid = SystemZ::CCMASK_ANY;
1811     return true;
1812 
1813   case Intrinsic::s390_vfeebs:
1814   case Intrinsic::s390_vfeehs:
1815   case Intrinsic::s390_vfeefs:
1816     Opcode = SystemZISD::VFEE_CC;
1817     CCValid = SystemZ::CCMASK_ANY;
1818     return true;
1819 
1820   case Intrinsic::s390_vfeezbs:
1821   case Intrinsic::s390_vfeezhs:
1822   case Intrinsic::s390_vfeezfs:
1823     Opcode = SystemZISD::VFEEZ_CC;
1824     CCValid = SystemZ::CCMASK_ANY;
1825     return true;
1826 
1827   case Intrinsic::s390_vfenebs:
1828   case Intrinsic::s390_vfenehs:
1829   case Intrinsic::s390_vfenefs:
1830     Opcode = SystemZISD::VFENE_CC;
1831     CCValid = SystemZ::CCMASK_ANY;
1832     return true;
1833 
1834   case Intrinsic::s390_vfenezbs:
1835   case Intrinsic::s390_vfenezhs:
1836   case Intrinsic::s390_vfenezfs:
1837     Opcode = SystemZISD::VFENEZ_CC;
1838     CCValid = SystemZ::CCMASK_ANY;
1839     return true;
1840 
1841   case Intrinsic::s390_vistrbs:
1842   case Intrinsic::s390_vistrhs:
1843   case Intrinsic::s390_vistrfs:
1844     Opcode = SystemZISD::VISTR_CC;
1845     CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
1846     return true;
1847 
1848   case Intrinsic::s390_vstrcbs:
1849   case Intrinsic::s390_vstrchs:
1850   case Intrinsic::s390_vstrcfs:
1851     Opcode = SystemZISD::VSTRC_CC;
1852     CCValid = SystemZ::CCMASK_ANY;
1853     return true;
1854 
1855   case Intrinsic::s390_vstrczbs:
1856   case Intrinsic::s390_vstrczhs:
1857   case Intrinsic::s390_vstrczfs:
1858     Opcode = SystemZISD::VSTRCZ_CC;
1859     CCValid = SystemZ::CCMASK_ANY;
1860     return true;
1861 
1862   case Intrinsic::s390_vstrsb:
1863   case Intrinsic::s390_vstrsh:
1864   case Intrinsic::s390_vstrsf:
1865     Opcode = SystemZISD::VSTRS_CC;
1866     CCValid = SystemZ::CCMASK_ANY;
1867     return true;
1868 
1869   case Intrinsic::s390_vstrszb:
1870   case Intrinsic::s390_vstrszh:
1871   case Intrinsic::s390_vstrszf:
1872     Opcode = SystemZISD::VSTRSZ_CC;
1873     CCValid = SystemZ::CCMASK_ANY;
1874     return true;
1875 
1876   case Intrinsic::s390_vfcedbs:
1877   case Intrinsic::s390_vfcesbs:
1878     Opcode = SystemZISD::VFCMPES;
1879     CCValid = SystemZ::CCMASK_VCMP;
1880     return true;
1881 
1882   case Intrinsic::s390_vfchdbs:
1883   case Intrinsic::s390_vfchsbs:
1884     Opcode = SystemZISD::VFCMPHS;
1885     CCValid = SystemZ::CCMASK_VCMP;
1886     return true;
1887 
1888   case Intrinsic::s390_vfchedbs:
1889   case Intrinsic::s390_vfchesbs:
1890     Opcode = SystemZISD::VFCMPHES;
1891     CCValid = SystemZ::CCMASK_VCMP;
1892     return true;
1893 
1894   case Intrinsic::s390_vftcidb:
1895   case Intrinsic::s390_vftcisb:
1896     Opcode = SystemZISD::VFTCI;
1897     CCValid = SystemZ::CCMASK_VCMP;
1898     return true;
1899 
1900   case Intrinsic::s390_tdc:
1901     Opcode = SystemZISD::TDC;
1902     CCValid = SystemZ::CCMASK_TDC;
1903     return true;
1904 
1905   default:
1906     return false;
1907   }
1908 }
1909 
1910 // Emit an intrinsic with chain and an explicit CC register result.
1911 static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,
1912                                            unsigned Opcode) {
1913   // Copy all operands except the intrinsic ID.
1914   unsigned NumOps = Op.getNumOperands();
1915   SmallVector<SDValue, 6> Ops;
1916   Ops.reserve(NumOps - 1);
1917   Ops.push_back(Op.getOperand(0));
1918   for (unsigned I = 2; I < NumOps; ++I)
1919     Ops.push_back(Op.getOperand(I));
1920 
1921   assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
1922   SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
1923   SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1924   SDValue OldChain = SDValue(Op.getNode(), 1);
1925   SDValue NewChain = SDValue(Intr.getNode(), 1);
1926   DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
1927   return Intr.getNode();
1928 }
1929 
1930 // Emit an intrinsic with an explicit CC register result.
1931 static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,
1932                                    unsigned Opcode) {
1933   // Copy all operands except the intrinsic ID.
1934   unsigned NumOps = Op.getNumOperands();
1935   SmallVector<SDValue, 6> Ops;
1936   Ops.reserve(NumOps - 1);
1937   for (unsigned I = 1; I < NumOps; ++I)
1938     Ops.push_back(Op.getOperand(I));
1939 
1940   SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
1941   return Intr.getNode();
1942 }
1943 
1944 // CC is a comparison that will be implemented using an integer or
1945 // floating-point comparison.  Return the condition code mask for
1946 // a branch on true.  In the integer case, CCMASK_CMP_UO is set for
1947 // unsigned comparisons and clear for signed ones.  In the floating-point
1948 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1949 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
1950 #define CONV(X) \
1951   case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1952   case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1953   case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1954 
1955   switch (CC) {
1956   default:
1957     llvm_unreachable("Invalid integer condition!");
1958 
1959   CONV(EQ);
1960   CONV(NE);
1961   CONV(GT);
1962   CONV(GE);
1963   CONV(LT);
1964   CONV(LE);
1965 
1966   case ISD::SETO:  return SystemZ::CCMASK_CMP_O;
1967   case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
1968   }
1969 #undef CONV
1970 }
1971 
1972 // If C can be converted to a comparison against zero, adjust the operands
1973 // as necessary.
1974 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
1975   if (C.ICmpType == SystemZICMP::UnsignedOnly)
1976     return;
1977 
1978   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
1979   if (!ConstOp1)
1980     return;
1981 
1982   int64_t Value = ConstOp1->getSExtValue();
1983   if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
1984       (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
1985       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
1986       (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
1987     C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
1988     C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
1989   }
1990 }
1991 
1992 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1993 // adjust the operands as necessary.
1994 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
1995                              Comparison &C) {
1996   // For us to make any changes, it must a comparison between a single-use
1997   // load and a constant.
1998   if (!C.Op0.hasOneUse() ||
1999       C.Op0.getOpcode() != ISD::LOAD ||
2000       C.Op1.getOpcode() != ISD::Constant)
2001     return;
2002 
2003   // We must have an 8- or 16-bit load.
2004   auto *Load = cast<LoadSDNode>(C.Op0);
2005   unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
2006   if (NumBits != 8 && NumBits != 16)
2007     return;
2008 
2009   // The load must be an extending one and the constant must be within the
2010   // range of the unextended value.
2011   auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2012   uint64_t Value = ConstOp1->getZExtValue();
2013   uint64_t Mask = (1 << NumBits) - 1;
2014   if (Load->getExtensionType() == ISD::SEXTLOAD) {
2015     // Make sure that ConstOp1 is in range of C.Op0.
2016     int64_t SignedValue = ConstOp1->getSExtValue();
2017     if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2018       return;
2019     if (C.ICmpType != SystemZICMP::SignedOnly) {
2020       // Unsigned comparison between two sign-extended values is equivalent
2021       // to unsigned comparison between two zero-extended values.
2022       Value &= Mask;
2023     } else if (NumBits == 8) {
2024       // Try to treat the comparison as unsigned, so that we can use CLI.
2025       // Adjust CCMask and Value as necessary.
2026       if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2027         // Test whether the high bit of the byte is set.
2028         Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2029       else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2030         // Test whether the high bit of the byte is clear.
2031         Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2032       else
2033         // No instruction exists for this combination.
2034         return;
2035       C.ICmpType = SystemZICMP::UnsignedOnly;
2036     }
2037   } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2038     if (Value > Mask)
2039       return;
2040     // If the constant is in range, we can use any comparison.
2041     C.ICmpType = SystemZICMP::Any;
2042   } else
2043     return;
2044 
2045   // Make sure that the first operand is an i32 of the right extension type.
2046   ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2047                               ISD::SEXTLOAD :
2048                               ISD::ZEXTLOAD);
2049   if (C.Op0.getValueType() != MVT::i32 ||
2050       Load->getExtensionType() != ExtType) {
2051     C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2052                            Load->getBasePtr(), Load->getPointerInfo(),
2053                            Load->getMemoryVT(), Load->getAlignment(),
2054                            Load->getMemOperand()->getFlags());
2055     // Update the chain uses.
2056     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2057   }
2058 
2059   // Make sure that the second operand is an i32 with the right value.
2060   if (C.Op1.getValueType() != MVT::i32 ||
2061       Value != ConstOp1->getZExtValue())
2062     C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2063 }
2064 
2065 // Return true if Op is either an unextended load, or a load suitable
2066 // for integer register-memory comparisons of type ICmpType.
2067 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2068   auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2069   if (Load) {
2070     // There are no instructions to compare a register with a memory byte.
2071     if (Load->getMemoryVT() == MVT::i8)
2072       return false;
2073     // Otherwise decide on extension type.
2074     switch (Load->getExtensionType()) {
2075     case ISD::NON_EXTLOAD:
2076       return true;
2077     case ISD::SEXTLOAD:
2078       return ICmpType != SystemZICMP::UnsignedOnly;
2079     case ISD::ZEXTLOAD:
2080       return ICmpType != SystemZICMP::SignedOnly;
2081     default:
2082       break;
2083     }
2084   }
2085   return false;
2086 }
2087 
2088 // Return true if it is better to swap the operands of C.
2089 static bool shouldSwapCmpOperands(const Comparison &C) {
2090   // Leave f128 comparisons alone, since they have no memory forms.
2091   if (C.Op0.getValueType() == MVT::f128)
2092     return false;
2093 
2094   // Always keep a floating-point constant second, since comparisons with
2095   // zero can use LOAD TEST and comparisons with other constants make a
2096   // natural memory operand.
2097   if (isa<ConstantFPSDNode>(C.Op1))
2098     return false;
2099 
2100   // Never swap comparisons with zero since there are many ways to optimize
2101   // those later.
2102   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2103   if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2104     return false;
2105 
2106   // Also keep natural memory operands second if the loaded value is
2107   // only used here.  Several comparisons have memory forms.
2108   if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2109     return false;
2110 
2111   // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2112   // In that case we generally prefer the memory to be second.
2113   if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2114     // The only exceptions are when the second operand is a constant and
2115     // we can use things like CHHSI.
2116     if (!ConstOp1)
2117       return true;
2118     // The unsigned memory-immediate instructions can handle 16-bit
2119     // unsigned integers.
2120     if (C.ICmpType != SystemZICMP::SignedOnly &&
2121         isUInt<16>(ConstOp1->getZExtValue()))
2122       return false;
2123     // The signed memory-immediate instructions can handle 16-bit
2124     // signed integers.
2125     if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2126         isInt<16>(ConstOp1->getSExtValue()))
2127       return false;
2128     return true;
2129   }
2130 
2131   // Try to promote the use of CGFR and CLGFR.
2132   unsigned Opcode0 = C.Op0.getOpcode();
2133   if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2134     return true;
2135   if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2136     return true;
2137   if (C.ICmpType != SystemZICMP::SignedOnly &&
2138       Opcode0 == ISD::AND &&
2139       C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2140       cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2141     return true;
2142 
2143   return false;
2144 }
2145 
2146 // Return a version of comparison CC mask CCMask in which the LT and GT
2147 // actions are swapped.
2148 static unsigned reverseCCMask(unsigned CCMask) {
2149   return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
2150           (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
2151           (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
2152           (CCMask & SystemZ::CCMASK_CMP_UO));
2153 }
2154 
2155 // Check whether C tests for equality between X and Y and whether X - Y
2156 // or Y - X is also computed.  In that case it's better to compare the
2157 // result of the subtraction against zero.
2158 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2159                                  Comparison &C) {
2160   if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2161       C.CCMask == SystemZ::CCMASK_CMP_NE) {
2162     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2163       SDNode *N = *I;
2164       if (N->getOpcode() == ISD::SUB &&
2165           ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2166            (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2167         C.Op0 = SDValue(N, 0);
2168         C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2169         return;
2170       }
2171     }
2172   }
2173 }
2174 
2175 // Check whether C compares a floating-point value with zero and if that
2176 // floating-point value is also negated.  In this case we can use the
2177 // negation to set CC, so avoiding separate LOAD AND TEST and
2178 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2179 static void adjustForFNeg(Comparison &C) {
2180   // This optimization is invalid for strict comparisons, since FNEG
2181   // does not raise any exceptions.
2182   if (C.Chain)
2183     return;
2184   auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2185   if (C1 && C1->isZero()) {
2186     for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2187       SDNode *N = *I;
2188       if (N->getOpcode() == ISD::FNEG) {
2189         C.Op0 = SDValue(N, 0);
2190         C.CCMask = reverseCCMask(C.CCMask);
2191         return;
2192       }
2193     }
2194   }
2195 }
2196 
2197 // Check whether C compares (shl X, 32) with 0 and whether X is
2198 // also sign-extended.  In that case it is better to test the result
2199 // of the sign extension using LTGFR.
2200 //
2201 // This case is important because InstCombine transforms a comparison
2202 // with (sext (trunc X)) into a comparison with (shl X, 32).
2203 static void adjustForLTGFR(Comparison &C) {
2204   // Check for a comparison between (shl X, 32) and 0.
2205   if (C.Op0.getOpcode() == ISD::SHL &&
2206       C.Op0.getValueType() == MVT::i64 &&
2207       C.Op1.getOpcode() == ISD::Constant &&
2208       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2209     auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2210     if (C1 && C1->getZExtValue() == 32) {
2211       SDValue ShlOp0 = C.Op0.getOperand(0);
2212       // See whether X has any SIGN_EXTEND_INREG uses.
2213       for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
2214         SDNode *N = *I;
2215         if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2216             cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2217           C.Op0 = SDValue(N, 0);
2218           return;
2219         }
2220       }
2221     }
2222   }
2223 }
2224 
2225 // If C compares the truncation of an extending load, try to compare
2226 // the untruncated value instead.  This exposes more opportunities to
2227 // reuse CC.
2228 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2229                                Comparison &C) {
2230   if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2231       C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2232       C.Op1.getOpcode() == ISD::Constant &&
2233       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2234     auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2235     if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueSizeInBits()) {
2236       unsigned Type = L->getExtensionType();
2237       if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2238           (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2239         C.Op0 = C.Op0.getOperand(0);
2240         C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2241       }
2242     }
2243   }
2244 }
2245 
2246 // Return true if shift operation N has an in-range constant shift value.
2247 // Store it in ShiftVal if so.
2248 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2249   auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2250   if (!Shift)
2251     return false;
2252 
2253   uint64_t Amount = Shift->getZExtValue();
2254   if (Amount >= N.getValueSizeInBits())
2255     return false;
2256 
2257   ShiftVal = Amount;
2258   return true;
2259 }
2260 
2261 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2262 // instruction and whether the CC value is descriptive enough to handle
2263 // a comparison of type Opcode between the AND result and CmpVal.
2264 // CCMask says which comparison result is being tested and BitSize is
2265 // the number of bits in the operands.  If TEST UNDER MASK can be used,
2266 // return the corresponding CC mask, otherwise return 0.
2267 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2268                                      uint64_t Mask, uint64_t CmpVal,
2269                                      unsigned ICmpType) {
2270   assert(Mask != 0 && "ANDs with zero should have been removed by now");
2271 
2272   // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2273   if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2274       !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2275     return 0;
2276 
2277   // Work out the masks for the lowest and highest bits.
2278   unsigned HighShift = 63 - countLeadingZeros(Mask);
2279   uint64_t High = uint64_t(1) << HighShift;
2280   uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2281 
2282   // Signed ordered comparisons are effectively unsigned if the sign
2283   // bit is dropped.
2284   bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2285 
2286   // Check for equality comparisons with 0, or the equivalent.
2287   if (CmpVal == 0) {
2288     if (CCMask == SystemZ::CCMASK_CMP_EQ)
2289       return SystemZ::CCMASK_TM_ALL_0;
2290     if (CCMask == SystemZ::CCMASK_CMP_NE)
2291       return SystemZ::CCMASK_TM_SOME_1;
2292   }
2293   if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2294     if (CCMask == SystemZ::CCMASK_CMP_LT)
2295       return SystemZ::CCMASK_TM_ALL_0;
2296     if (CCMask == SystemZ::CCMASK_CMP_GE)
2297       return SystemZ::CCMASK_TM_SOME_1;
2298   }
2299   if (EffectivelyUnsigned && CmpVal < Low) {
2300     if (CCMask == SystemZ::CCMASK_CMP_LE)
2301       return SystemZ::CCMASK_TM_ALL_0;
2302     if (CCMask == SystemZ::CCMASK_CMP_GT)
2303       return SystemZ::CCMASK_TM_SOME_1;
2304   }
2305 
2306   // Check for equality comparisons with the mask, or the equivalent.
2307   if (CmpVal == Mask) {
2308     if (CCMask == SystemZ::CCMASK_CMP_EQ)
2309       return SystemZ::CCMASK_TM_ALL_1;
2310     if (CCMask == SystemZ::CCMASK_CMP_NE)
2311       return SystemZ::CCMASK_TM_SOME_0;
2312   }
2313   if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2314     if (CCMask == SystemZ::CCMASK_CMP_GT)
2315       return SystemZ::CCMASK_TM_ALL_1;
2316     if (CCMask == SystemZ::CCMASK_CMP_LE)
2317       return SystemZ::CCMASK_TM_SOME_0;
2318   }
2319   if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2320     if (CCMask == SystemZ::CCMASK_CMP_GE)
2321       return SystemZ::CCMASK_TM_ALL_1;
2322     if (CCMask == SystemZ::CCMASK_CMP_LT)
2323       return SystemZ::CCMASK_TM_SOME_0;
2324   }
2325 
2326   // Check for ordered comparisons with the top bit.
2327   if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2328     if (CCMask == SystemZ::CCMASK_CMP_LE)
2329       return SystemZ::CCMASK_TM_MSB_0;
2330     if (CCMask == SystemZ::CCMASK_CMP_GT)
2331       return SystemZ::CCMASK_TM_MSB_1;
2332   }
2333   if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2334     if (CCMask == SystemZ::CCMASK_CMP_LT)
2335       return SystemZ::CCMASK_TM_MSB_0;
2336     if (CCMask == SystemZ::CCMASK_CMP_GE)
2337       return SystemZ::CCMASK_TM_MSB_1;
2338   }
2339 
2340   // If there are just two bits, we can do equality checks for Low and High
2341   // as well.
2342   if (Mask == Low + High) {
2343     if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2344       return SystemZ::CCMASK_TM_MIXED_MSB_0;
2345     if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2346       return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
2347     if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2348       return SystemZ::CCMASK_TM_MIXED_MSB_1;
2349     if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2350       return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
2351   }
2352 
2353   // Looks like we've exhausted our options.
2354   return 0;
2355 }
2356 
2357 // See whether C can be implemented as a TEST UNDER MASK instruction.
2358 // Update the arguments with the TM version if so.
2359 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2360                                    Comparison &C) {
2361   // Check that we have a comparison with a constant.
2362   auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2363   if (!ConstOp1)
2364     return;
2365   uint64_t CmpVal = ConstOp1->getZExtValue();
2366 
2367   // Check whether the nonconstant input is an AND with a constant mask.
2368   Comparison NewC(C);
2369   uint64_t MaskVal;
2370   ConstantSDNode *Mask = nullptr;
2371   if (C.Op0.getOpcode() == ISD::AND) {
2372     NewC.Op0 = C.Op0.getOperand(0);
2373     NewC.Op1 = C.Op0.getOperand(1);
2374     Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2375     if (!Mask)
2376       return;
2377     MaskVal = Mask->getZExtValue();
2378   } else {
2379     // There is no instruction to compare with a 64-bit immediate
2380     // so use TMHH instead if possible.  We need an unsigned ordered
2381     // comparison with an i64 immediate.
2382     if (NewC.Op0.getValueType() != MVT::i64 ||
2383         NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2384         NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2385         NewC.ICmpType == SystemZICMP::SignedOnly)
2386       return;
2387     // Convert LE and GT comparisons into LT and GE.
2388     if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2389         NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2390       if (CmpVal == uint64_t(-1))
2391         return;
2392       CmpVal += 1;
2393       NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2394     }
2395     // If the low N bits of Op1 are zero than the low N bits of Op0 can
2396     // be masked off without changing the result.
2397     MaskVal = -(CmpVal & -CmpVal);
2398     NewC.ICmpType = SystemZICMP::UnsignedOnly;
2399   }
2400   if (!MaskVal)
2401     return;
2402 
2403   // Check whether the combination of mask, comparison value and comparison
2404   // type are suitable.
2405   unsigned BitSize = NewC.Op0.getValueSizeInBits();
2406   unsigned NewCCMask, ShiftVal;
2407   if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2408       NewC.Op0.getOpcode() == ISD::SHL &&
2409       isSimpleShift(NewC.Op0, ShiftVal) &&
2410       (MaskVal >> ShiftVal != 0) &&
2411       ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2412       (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2413                                         MaskVal >> ShiftVal,
2414                                         CmpVal >> ShiftVal,
2415                                         SystemZICMP::Any))) {
2416     NewC.Op0 = NewC.Op0.getOperand(0);
2417     MaskVal >>= ShiftVal;
2418   } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2419              NewC.Op0.getOpcode() == ISD::SRL &&
2420              isSimpleShift(NewC.Op0, ShiftVal) &&
2421              (MaskVal << ShiftVal != 0) &&
2422              ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2423              (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2424                                                MaskVal << ShiftVal,
2425                                                CmpVal << ShiftVal,
2426                                                SystemZICMP::UnsignedOnly))) {
2427     NewC.Op0 = NewC.Op0.getOperand(0);
2428     MaskVal <<= ShiftVal;
2429   } else {
2430     NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2431                                      NewC.ICmpType);
2432     if (!NewCCMask)
2433       return;
2434   }
2435 
2436   // Go ahead and make the change.
2437   C.Opcode = SystemZISD::TM;
2438   C.Op0 = NewC.Op0;
2439   if (Mask && Mask->getZExtValue() == MaskVal)
2440     C.Op1 = SDValue(Mask, 0);
2441   else
2442     C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2443   C.CCValid = SystemZ::CCMASK_TM;
2444   C.CCMask = NewCCMask;
2445 }
2446 
2447 // See whether the comparison argument contains a redundant AND
2448 // and remove it if so.  This sometimes happens due to the generic
2449 // BRCOND expansion.
2450 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2451                                   Comparison &C) {
2452   if (C.Op0.getOpcode() != ISD::AND)
2453     return;
2454   auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2455   if (!Mask)
2456     return;
2457   KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2458   if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2459     return;
2460 
2461   C.Op0 = C.Op0.getOperand(0);
2462 }
2463 
2464 // Return a Comparison that tests the condition-code result of intrinsic
2465 // node Call against constant integer CC using comparison code Cond.
2466 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2467 // and CCValid is the set of possible condition-code results.
2468 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2469                                   SDValue Call, unsigned CCValid, uint64_t CC,
2470                                   ISD::CondCode Cond) {
2471   Comparison C(Call, SDValue(), SDValue());
2472   C.Opcode = Opcode;
2473   C.CCValid = CCValid;
2474   if (Cond == ISD::SETEQ)
2475     // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2476     C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2477   else if (Cond == ISD::SETNE)
2478     // ...and the inverse of that.
2479     C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2480   else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2481     // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2482     // always true for CC>3.
2483     C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2484   else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2485     // ...and the inverse of that.
2486     C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2487   else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2488     // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2489     // always true for CC>3.
2490     C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2491   else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2492     // ...and the inverse of that.
2493     C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2494   else
2495     llvm_unreachable("Unexpected integer comparison type");
2496   C.CCMask &= CCValid;
2497   return C;
2498 }
2499 
2500 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2501 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2502                          ISD::CondCode Cond, const SDLoc &DL,
2503                          SDValue Chain = SDValue(),
2504                          bool IsSignaling = false) {
2505   if (CmpOp1.getOpcode() == ISD::Constant) {
2506     assert(!Chain);
2507     uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2508     unsigned Opcode, CCValid;
2509     if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2510         CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2511         isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2512       return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2513     if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2514         CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2515         isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2516       return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2517   }
2518   Comparison C(CmpOp0, CmpOp1, Chain);
2519   C.CCMask = CCMaskForCondCode(Cond);
2520   if (C.Op0.getValueType().isFloatingPoint()) {
2521     C.CCValid = SystemZ::CCMASK_FCMP;
2522     if (!C.Chain)
2523       C.Opcode = SystemZISD::FCMP;
2524     else if (!IsSignaling)
2525       C.Opcode = SystemZISD::STRICT_FCMP;
2526     else
2527       C.Opcode = SystemZISD::STRICT_FCMPS;
2528     adjustForFNeg(C);
2529   } else {
2530     assert(!C.Chain);
2531     C.CCValid = SystemZ::CCMASK_ICMP;
2532     C.Opcode = SystemZISD::ICMP;
2533     // Choose the type of comparison.  Equality and inequality tests can
2534     // use either signed or unsigned comparisons.  The choice also doesn't
2535     // matter if both sign bits are known to be clear.  In those cases we
2536     // want to give the main isel code the freedom to choose whichever
2537     // form fits best.
2538     if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2539         C.CCMask == SystemZ::CCMASK_CMP_NE ||
2540         (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2541       C.ICmpType = SystemZICMP::Any;
2542     else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2543       C.ICmpType = SystemZICMP::UnsignedOnly;
2544     else
2545       C.ICmpType = SystemZICMP::SignedOnly;
2546     C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2547     adjustForRedundantAnd(DAG, DL, C);
2548     adjustZeroCmp(DAG, DL, C);
2549     adjustSubwordCmp(DAG, DL, C);
2550     adjustForSubtraction(DAG, DL, C);
2551     adjustForLTGFR(C);
2552     adjustICmpTruncate(DAG, DL, C);
2553   }
2554 
2555   if (shouldSwapCmpOperands(C)) {
2556     std::swap(C.Op0, C.Op1);
2557     C.CCMask = reverseCCMask(C.CCMask);
2558   }
2559 
2560   adjustForTestUnderMask(DAG, DL, C);
2561   return C;
2562 }
2563 
2564 // Emit the comparison instruction described by C.
2565 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2566   if (!C.Op1.getNode()) {
2567     SDNode *Node;
2568     switch (C.Op0.getOpcode()) {
2569     case ISD::INTRINSIC_W_CHAIN:
2570       Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2571       return SDValue(Node, 0);
2572     case ISD::INTRINSIC_WO_CHAIN:
2573       Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2574       return SDValue(Node, Node->getNumValues() - 1);
2575     default:
2576       llvm_unreachable("Invalid comparison operands");
2577     }
2578   }
2579   if (C.Opcode == SystemZISD::ICMP)
2580     return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2581                        DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2582   if (C.Opcode == SystemZISD::TM) {
2583     bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2584                          bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2585     return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2586                        DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2587   }
2588   if (C.Chain) {
2589     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2590     return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2591   }
2592   return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2593 }
2594 
2595 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2596 // 64 bits.  Extend is the extension type to use.  Store the high part
2597 // in Hi and the low part in Lo.
2598 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2599                             SDValue Op0, SDValue Op1, SDValue &Hi,
2600                             SDValue &Lo) {
2601   Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2602   Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2603   SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2604   Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2605                    DAG.getConstant(32, DL, MVT::i64));
2606   Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2607   Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2608 }
2609 
2610 // Lower a binary operation that produces two VT results, one in each
2611 // half of a GR128 pair.  Op0 and Op1 are the VT operands to the operation,
2612 // and Opcode performs the GR128 operation.  Store the even register result
2613 // in Even and the odd register result in Odd.
2614 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2615                              unsigned Opcode, SDValue Op0, SDValue Op1,
2616                              SDValue &Even, SDValue &Odd) {
2617   SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2618   bool Is32Bit = is32Bit(VT);
2619   Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2620   Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2621 }
2622 
2623 // Return an i32 value that is 1 if the CC value produced by CCReg is
2624 // in the mask CCMask and 0 otherwise.  CC is known to have a value
2625 // in CCValid, so other values can be ignored.
2626 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2627                          unsigned CCValid, unsigned CCMask) {
2628   SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2629                    DAG.getConstant(0, DL, MVT::i32),
2630                    DAG.getTargetConstant(CCValid, DL, MVT::i32),
2631                    DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2632   return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2633 }
2634 
2635 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2636 // be done directly.  Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2637 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2638 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2639 // floating-point comparisons.
2640 enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2641 static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
2642   switch (CC) {
2643   case ISD::SETOEQ:
2644   case ISD::SETEQ:
2645     switch (Mode) {
2646     case CmpMode::Int:         return SystemZISD::VICMPE;
2647     case CmpMode::FP:          return SystemZISD::VFCMPE;
2648     case CmpMode::StrictFP:    return SystemZISD::STRICT_VFCMPE;
2649     case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
2650     }
2651     llvm_unreachable("Bad mode");
2652 
2653   case ISD::SETOGE:
2654   case ISD::SETGE:
2655     switch (Mode) {
2656     case CmpMode::Int:         return 0;
2657     case CmpMode::FP:          return SystemZISD::VFCMPHE;
2658     case CmpMode::StrictFP:    return SystemZISD::STRICT_VFCMPHE;
2659     case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
2660     }
2661     llvm_unreachable("Bad mode");
2662 
2663   case ISD::SETOGT:
2664   case ISD::SETGT:
2665     switch (Mode) {
2666     case CmpMode::Int:         return SystemZISD::VICMPH;
2667     case CmpMode::FP:          return SystemZISD::VFCMPH;
2668     case CmpMode::StrictFP:    return SystemZISD::STRICT_VFCMPH;
2669     case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
2670     }
2671     llvm_unreachable("Bad mode");
2672 
2673   case ISD::SETUGT:
2674     switch (Mode) {
2675     case CmpMode::Int:         return SystemZISD::VICMPHL;
2676     case CmpMode::FP:          return 0;
2677     case CmpMode::StrictFP:    return 0;
2678     case CmpMode::SignalingFP: return 0;
2679     }
2680     llvm_unreachable("Bad mode");
2681 
2682   default:
2683     return 0;
2684   }
2685 }
2686 
2687 // Return the SystemZISD vector comparison operation for CC or its inverse,
2688 // or 0 if neither can be done directly.  Indicate in Invert whether the
2689 // result is for the inverse of CC.  Mode is as above.
2690 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,
2691                                             bool &Invert) {
2692   if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2693     Invert = false;
2694     return Opcode;
2695   }
2696 
2697   CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int);
2698   if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2699     Invert = true;
2700     return Opcode;
2701   }
2702 
2703   return 0;
2704 }
2705 
2706 // Return a v2f64 that contains the extended form of elements Start and Start+1
2707 // of v4f32 value Op.  If Chain is nonnull, return the strict form.
2708 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2709                                   SDValue Op, SDValue Chain) {
2710   int Mask[] = { Start, -1, Start + 1, -1 };
2711   Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2712   if (Chain) {
2713     SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
2714     return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2715   }
2716   return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2717 }
2718 
2719 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2720 // producing a result of type VT.  If Chain is nonnull, return the strict form.
2721 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2722                                             const SDLoc &DL, EVT VT,
2723                                             SDValue CmpOp0,
2724                                             SDValue CmpOp1,
2725                                             SDValue Chain) const {
2726   // There is no hardware support for v4f32 (unless we have the vector
2727   // enhancements facility 1), so extend the vector into two v2f64s
2728   // and compare those.
2729   if (CmpOp0.getValueType() == MVT::v4f32 &&
2730       !Subtarget.hasVectorEnhancements1()) {
2731     SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2732     SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2733     SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2734     SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2735     if (Chain) {
2736       SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
2737       SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2738       SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2739       SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2740       SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2741                             H1.getValue(1), L1.getValue(1),
2742                             HRes.getValue(1), LRes.getValue(1) };
2743       SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
2744       SDValue Ops[2] = { Res, NewChain };
2745       return DAG.getMergeValues(Ops, DL);
2746     }
2747     SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2748     SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2749     return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2750   }
2751   if (Chain) {
2752     SDVTList VTs = DAG.getVTList(VT, MVT::Other);
2753     return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
2754   }
2755   return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2756 }
2757 
2758 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2759 // an integer mask of type VT.  If Chain is nonnull, we have a strict
2760 // floating-point comparison.  If in addition IsSignaling is true, we have
2761 // a strict signaling floating-point comparison.
2762 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2763                                                 const SDLoc &DL, EVT VT,
2764                                                 ISD::CondCode CC,
2765                                                 SDValue CmpOp0,
2766                                                 SDValue CmpOp1,
2767                                                 SDValue Chain,
2768                                                 bool IsSignaling) const {
2769   bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2770   assert (!Chain || IsFP);
2771   assert (!IsSignaling || Chain);
2772   CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
2773                  Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
2774   bool Invert = false;
2775   SDValue Cmp;
2776   switch (CC) {
2777     // Handle tests for order using (or (ogt y x) (oge x y)).
2778   case ISD::SETUO:
2779     Invert = true;
2780     LLVM_FALLTHROUGH;
2781   case ISD::SETO: {
2782     assert(IsFP && "Unexpected integer comparison");
2783     SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2784                               DL, VT, CmpOp1, CmpOp0, Chain);
2785     SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
2786                               DL, VT, CmpOp0, CmpOp1, Chain);
2787     Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2788     if (Chain)
2789       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2790                           LT.getValue(1), GE.getValue(1));
2791     break;
2792   }
2793 
2794     // Handle <> tests using (or (ogt y x) (ogt x y)).
2795   case ISD::SETUEQ:
2796     Invert = true;
2797     LLVM_FALLTHROUGH;
2798   case ISD::SETONE: {
2799     assert(IsFP && "Unexpected integer comparison");
2800     SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2801                               DL, VT, CmpOp1, CmpOp0, Chain);
2802     SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2803                               DL, VT, CmpOp0, CmpOp1, Chain);
2804     Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2805     if (Chain)
2806       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2807                           LT.getValue(1), GT.getValue(1));
2808     break;
2809   }
2810 
2811     // Otherwise a single comparison is enough.  It doesn't really
2812     // matter whether we try the inversion or the swap first, since
2813     // there are no cases where both work.
2814   default:
2815     if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2816       Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
2817     else {
2818       CC = ISD::getSetCCSwappedOperands(CC);
2819       if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2820         Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
2821       else
2822         llvm_unreachable("Unhandled comparison");
2823     }
2824     if (Chain)
2825       Chain = Cmp.getValue(1);
2826     break;
2827   }
2828   if (Invert) {
2829     SDValue Mask =
2830       DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
2831     Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2832   }
2833   if (Chain && Chain.getNode() != Cmp.getNode()) {
2834     SDValue Ops[2] = { Cmp, Chain };
2835     Cmp = DAG.getMergeValues(Ops, DL);
2836   }
2837   return Cmp;
2838 }
2839 
2840 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2841                                           SelectionDAG &DAG) const {
2842   SDValue CmpOp0   = Op.getOperand(0);
2843   SDValue CmpOp1   = Op.getOperand(1);
2844   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2845   SDLoc DL(Op);
2846   EVT VT = Op.getValueType();
2847   if (VT.isVector())
2848     return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
2849 
2850   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2851   SDValue CCReg = emitCmp(DAG, DL, C);
2852   return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2853 }
2854 
2855 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
2856                                                   SelectionDAG &DAG,
2857                                                   bool IsSignaling) const {
2858   SDValue Chain    = Op.getOperand(0);
2859   SDValue CmpOp0   = Op.getOperand(1);
2860   SDValue CmpOp1   = Op.getOperand(2);
2861   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
2862   SDLoc DL(Op);
2863   EVT VT = Op.getNode()->getValueType(0);
2864   if (VT.isVector()) {
2865     SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
2866                                    Chain, IsSignaling);
2867     return Res.getValue(Op.getResNo());
2868   }
2869 
2870   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
2871   SDValue CCReg = emitCmp(DAG, DL, C);
2872   CCReg->setFlags(Op->getFlags());
2873   SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2874   SDValue Ops[2] = { Result, CCReg.getValue(1) };
2875   return DAG.getMergeValues(Ops, DL);
2876 }
2877 
2878 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2879   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2880   SDValue CmpOp0   = Op.getOperand(2);
2881   SDValue CmpOp1   = Op.getOperand(3);
2882   SDValue Dest     = Op.getOperand(4);
2883   SDLoc DL(Op);
2884 
2885   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2886   SDValue CCReg = emitCmp(DAG, DL, C);
2887   return DAG.getNode(
2888       SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
2889       DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
2890       DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
2891 }
2892 
2893 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2894 // allowing Pos and Neg to be wider than CmpOp.
2895 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
2896   return (Neg.getOpcode() == ISD::SUB &&
2897           Neg.getOperand(0).getOpcode() == ISD::Constant &&
2898           cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
2899           Neg.getOperand(1) == Pos &&
2900           (Pos == CmpOp ||
2901            (Pos.getOpcode() == ISD::SIGN_EXTEND &&
2902             Pos.getOperand(0) == CmpOp)));
2903 }
2904 
2905 // Return the absolute or negative absolute of Op; IsNegative decides which.
2906 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
2907                            bool IsNegative) {
2908   Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
2909   if (IsNegative)
2910     Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
2911                      DAG.getConstant(0, DL, Op.getValueType()), Op);
2912   return Op;
2913 }
2914 
2915 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
2916                                               SelectionDAG &DAG) const {
2917   SDValue CmpOp0   = Op.getOperand(0);
2918   SDValue CmpOp1   = Op.getOperand(1);
2919   SDValue TrueOp   = Op.getOperand(2);
2920   SDValue FalseOp  = Op.getOperand(3);
2921   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2922   SDLoc DL(Op);
2923 
2924   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2925 
2926   // Check for absolute and negative-absolute selections, including those
2927   // where the comparison value is sign-extended (for LPGFR and LNGFR).
2928   // This check supplements the one in DAGCombiner.
2929   if (C.Opcode == SystemZISD::ICMP &&
2930       C.CCMask != SystemZ::CCMASK_CMP_EQ &&
2931       C.CCMask != SystemZ::CCMASK_CMP_NE &&
2932       C.Op1.getOpcode() == ISD::Constant &&
2933       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2934     if (isAbsolute(C.Op0, TrueOp, FalseOp))
2935       return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
2936     if (isAbsolute(C.Op0, FalseOp, TrueOp))
2937       return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
2938   }
2939 
2940   SDValue CCReg = emitCmp(DAG, DL, C);
2941   SDValue Ops[] = {TrueOp, FalseOp,
2942                    DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
2943                    DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
2944 
2945   return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
2946 }
2947 
2948 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
2949                                                   SelectionDAG &DAG) const {
2950   SDLoc DL(Node);
2951   const GlobalValue *GV = Node->getGlobal();
2952   int64_t Offset = Node->getOffset();
2953   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2954   CodeModel::Model CM = DAG.getTarget().getCodeModel();
2955 
2956   SDValue Result;
2957   if (Subtarget.isPC32DBLSymbol(GV, CM)) {
2958     if (isInt<32>(Offset)) {
2959       // Assign anchors at 1<<12 byte boundaries.
2960       uint64_t Anchor = Offset & ~uint64_t(0xfff);
2961       Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
2962       Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2963 
2964       // The offset can be folded into the address if it is aligned to a
2965       // halfword.
2966       Offset -= Anchor;
2967       if (Offset != 0 && (Offset & 1) == 0) {
2968         SDValue Full =
2969           DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
2970         Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
2971         Offset = 0;
2972       }
2973     } else {
2974       // Conservatively load a constant offset greater than 32 bits into a
2975       // register below.
2976       Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
2977       Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2978     }
2979   } else {
2980     Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
2981     Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2982     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
2983                          MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2984   }
2985 
2986   // If there was a non-zero offset that we didn't fold, create an explicit
2987   // addition for it.
2988   if (Offset != 0)
2989     Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
2990                          DAG.getConstant(Offset, DL, PtrVT));
2991 
2992   return Result;
2993 }
2994 
2995 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
2996                                                  SelectionDAG &DAG,
2997                                                  unsigned Opcode,
2998                                                  SDValue GOTOffset) const {
2999   SDLoc DL(Node);
3000   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3001   SDValue Chain = DAG.getEntryNode();
3002   SDValue Glue;
3003 
3004   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3005       CallingConv::GHC)
3006     report_fatal_error("In GHC calling convention TLS is not supported");
3007 
3008   // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3009   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3010   Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3011   Glue = Chain.getValue(1);
3012   Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3013   Glue = Chain.getValue(1);
3014 
3015   // The first call operand is the chain and the second is the TLS symbol.
3016   SmallVector<SDValue, 8> Ops;
3017   Ops.push_back(Chain);
3018   Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3019                                            Node->getValueType(0),
3020                                            0, 0));
3021 
3022   // Add argument registers to the end of the list so that they are
3023   // known live into the call.
3024   Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3025   Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3026 
3027   // Add a register mask operand representing the call-preserved registers.
3028   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3029   const uint32_t *Mask =
3030       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3031   assert(Mask && "Missing call preserved mask for calling convention");
3032   Ops.push_back(DAG.getRegisterMask(Mask));
3033 
3034   // Glue the call to the argument copies.
3035   Ops.push_back(Glue);
3036 
3037   // Emit the call.
3038   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3039   Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3040   Glue = Chain.getValue(1);
3041 
3042   // Copy the return value from %r2.
3043   return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3044 }
3045 
3046 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3047                                                   SelectionDAG &DAG) const {
3048   SDValue Chain = DAG.getEntryNode();
3049   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3050 
3051   // The high part of the thread pointer is in access register 0.
3052   SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3053   TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3054 
3055   // The low part of the thread pointer is in access register 1.
3056   SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3057   TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3058 
3059   // Merge them into a single 64-bit address.
3060   SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3061                                     DAG.getConstant(32, DL, PtrVT));
3062   return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3063 }
3064 
3065 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3066                                                      SelectionDAG &DAG) const {
3067   if (DAG.getTarget().useEmulatedTLS())
3068     return LowerToTLSEmulatedModel(Node, DAG);
3069   SDLoc DL(Node);
3070   const GlobalValue *GV = Node->getGlobal();
3071   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3072   TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3073 
3074   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3075       CallingConv::GHC)
3076     report_fatal_error("In GHC calling convention TLS is not supported");
3077 
3078   SDValue TP = lowerThreadPointer(DL, DAG);
3079 
3080   // Get the offset of GA from the thread pointer, based on the TLS model.
3081   SDValue Offset;
3082   switch (model) {
3083     case TLSModel::GeneralDynamic: {
3084       // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3085       SystemZConstantPoolValue *CPV =
3086         SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
3087 
3088       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
3089       Offset = DAG.getLoad(
3090           PtrVT, DL, DAG.getEntryNode(), Offset,
3091           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3092 
3093       // Call __tls_get_offset to retrieve the offset.
3094       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3095       break;
3096     }
3097 
3098     case TLSModel::LocalDynamic: {
3099       // Load the GOT offset of the module ID.
3100       SystemZConstantPoolValue *CPV =
3101         SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
3102 
3103       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
3104       Offset = DAG.getLoad(
3105           PtrVT, DL, DAG.getEntryNode(), Offset,
3106           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3107 
3108       // Call __tls_get_offset to retrieve the module base offset.
3109       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3110 
3111       // Note: The SystemZLDCleanupPass will remove redundant computations
3112       // of the module base offset.  Count total number of local-dynamic
3113       // accesses to trigger execution of that pass.
3114       SystemZMachineFunctionInfo* MFI =
3115         DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
3116       MFI->incNumLocalDynamicTLSAccesses();
3117 
3118       // Add the per-symbol offset.
3119       CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
3120 
3121       SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
3122       DTPOffset = DAG.getLoad(
3123           PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3124           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3125 
3126       Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3127       break;
3128     }
3129 
3130     case TLSModel::InitialExec: {
3131       // Load the offset from the GOT.
3132       Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3133                                           SystemZII::MO_INDNTPOFF);
3134       Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
3135       Offset =
3136           DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3137                       MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3138       break;
3139     }
3140 
3141     case TLSModel::LocalExec: {
3142       // Force the offset into the constant pool and load it from there.
3143       SystemZConstantPoolValue *CPV =
3144         SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
3145 
3146       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
3147       Offset = DAG.getLoad(
3148           PtrVT, DL, DAG.getEntryNode(), Offset,
3149           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3150       break;
3151     }
3152   }
3153 
3154   // Add the base and offset together.
3155   return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3156 }
3157 
3158 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3159                                                  SelectionDAG &DAG) const {
3160   SDLoc DL(Node);
3161   const BlockAddress *BA = Node->getBlockAddress();
3162   int64_t Offset = Node->getOffset();
3163   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3164 
3165   SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3166   Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3167   return Result;
3168 }
3169 
3170 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3171                                               SelectionDAG &DAG) const {
3172   SDLoc DL(JT);
3173   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3174   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3175 
3176   // Use LARL to load the address of the table.
3177   return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3178 }
3179 
3180 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3181                                                  SelectionDAG &DAG) const {
3182   SDLoc DL(CP);
3183   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3184 
3185   SDValue Result;
3186   if (CP->isMachineConstantPoolEntry())
3187     Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
3188                                        CP->getAlignment());
3189   else
3190     Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
3191                                        CP->getAlignment(), CP->getOffset());
3192 
3193   // Use LARL to load the address of the constant pool entry.
3194   return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3195 }
3196 
3197 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3198                                               SelectionDAG &DAG) const {
3199   MachineFunction &MF = DAG.getMachineFunction();
3200   MachineFrameInfo &MFI = MF.getFrameInfo();
3201   MFI.setFrameAddressIsTaken(true);
3202 
3203   SDLoc DL(Op);
3204   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3205   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3206 
3207   // If the back chain frame index has not been allocated yet, do so.
3208   SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>();
3209   int BackChainIdx = FI->getFramePointerSaveIndex();
3210   if (!BackChainIdx) {
3211     // By definition, the frame address is the address of the back chain.
3212     BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
3213     FI->setFramePointerSaveIndex(BackChainIdx);
3214   }
3215   SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3216 
3217   // FIXME The frontend should detect this case.
3218   if (Depth > 0) {
3219     report_fatal_error("Unsupported stack frame traversal count");
3220   }
3221 
3222   return BackChain;
3223 }
3224 
3225 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3226                                                SelectionDAG &DAG) const {
3227   MachineFunction &MF = DAG.getMachineFunction();
3228   MachineFrameInfo &MFI = MF.getFrameInfo();
3229   MFI.setReturnAddressIsTaken(true);
3230 
3231   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3232     return SDValue();
3233 
3234   SDLoc DL(Op);
3235   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3236   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3237 
3238   // FIXME The frontend should detect this case.
3239   if (Depth > 0) {
3240     report_fatal_error("Unsupported stack frame traversal count");
3241   }
3242 
3243   // Return R14D, which has the return address. Mark it an implicit live-in.
3244   unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3245   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3246 }
3247 
3248 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3249                                             SelectionDAG &DAG) const {
3250   SDLoc DL(Op);
3251   SDValue In = Op.getOperand(0);
3252   EVT InVT = In.getValueType();
3253   EVT ResVT = Op.getValueType();
3254 
3255   // Convert loads directly.  This is normally done by DAGCombiner,
3256   // but we need this case for bitcasts that are created during lowering
3257   // and which are then lowered themselves.
3258   if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3259     if (ISD::isNormalLoad(LoadN)) {
3260       SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3261                                     LoadN->getBasePtr(), LoadN->getMemOperand());
3262       // Update the chain uses.
3263       DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3264       return NewLoad;
3265     }
3266 
3267   if (InVT == MVT::i32 && ResVT == MVT::f32) {
3268     SDValue In64;
3269     if (Subtarget.hasHighWord()) {
3270       SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3271                                        MVT::i64);
3272       In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3273                                        MVT::i64, SDValue(U64, 0), In);
3274     } else {
3275       In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3276       In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3277                          DAG.getConstant(32, DL, MVT::i64));
3278     }
3279     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3280     return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3281                                       DL, MVT::f32, Out64);
3282   }
3283   if (InVT == MVT::f32 && ResVT == MVT::i32) {
3284     SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3285     SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3286                                              MVT::f64, SDValue(U64, 0), In);
3287     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3288     if (Subtarget.hasHighWord())
3289       return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3290                                         MVT::i32, Out64);
3291     SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3292                                 DAG.getConstant(32, DL, MVT::i64));
3293     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3294   }
3295   llvm_unreachable("Unexpected bitcast combination");
3296 }
3297 
3298 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3299                                             SelectionDAG &DAG) const {
3300   MachineFunction &MF = DAG.getMachineFunction();
3301   SystemZMachineFunctionInfo *FuncInfo =
3302     MF.getInfo<SystemZMachineFunctionInfo>();
3303   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3304 
3305   SDValue Chain   = Op.getOperand(0);
3306   SDValue Addr    = Op.getOperand(1);
3307   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3308   SDLoc DL(Op);
3309 
3310   // The initial values of each field.
3311   const unsigned NumFields = 4;
3312   SDValue Fields[NumFields] = {
3313     DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3314     DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3315     DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3316     DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3317   };
3318 
3319   // Store each field into its respective slot.
3320   SDValue MemOps[NumFields];
3321   unsigned Offset = 0;
3322   for (unsigned I = 0; I < NumFields; ++I) {
3323     SDValue FieldAddr = Addr;
3324     if (Offset != 0)
3325       FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3326                               DAG.getIntPtrConstant(Offset, DL));
3327     MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3328                              MachinePointerInfo(SV, Offset));
3329     Offset += 8;
3330   }
3331   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3332 }
3333 
3334 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3335                                            SelectionDAG &DAG) const {
3336   SDValue Chain      = Op.getOperand(0);
3337   SDValue DstPtr     = Op.getOperand(1);
3338   SDValue SrcPtr     = Op.getOperand(2);
3339   const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3340   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3341   SDLoc DL(Op);
3342 
3343   return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
3344                        /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
3345                        /*isTailCall*/false,
3346                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
3347 }
3348 
3349 SDValue SystemZTargetLowering::
3350 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
3351   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3352   MachineFunction &MF = DAG.getMachineFunction();
3353   bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3354   bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3355 
3356   SDValue Chain = Op.getOperand(0);
3357   SDValue Size  = Op.getOperand(1);
3358   SDValue Align = Op.getOperand(2);
3359   SDLoc DL(Op);
3360 
3361   // If user has set the no alignment function attribute, ignore
3362   // alloca alignments.
3363   uint64_t AlignVal = (RealignOpt ?
3364                        dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3365 
3366   uint64_t StackAlign = TFI->getStackAlignment();
3367   uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3368   uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3369 
3370   unsigned SPReg = getStackPointerRegisterToSaveRestore();
3371   SDValue NeededSpace = Size;
3372 
3373   // Get a reference to the stack pointer.
3374   SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3375 
3376   // If we need a backchain, save it now.
3377   SDValue Backchain;
3378   if (StoreBackchain)
3379     Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
3380 
3381   // Add extra space for alignment if needed.
3382   if (ExtraAlignSpace)
3383     NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3384                               DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3385 
3386   // Get the new stack pointer value.
3387   SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3388 
3389   // Copy the new stack pointer back.
3390   Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3391 
3392   // The allocated data lives above the 160 bytes allocated for the standard
3393   // frame, plus any outgoing stack arguments.  We don't know how much that
3394   // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3395   SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3396   SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3397 
3398   // Dynamically realign if needed.
3399   if (RequiredAlign > StackAlign) {
3400     Result =
3401       DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3402                   DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3403     Result =
3404       DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3405                   DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3406   }
3407 
3408   if (StoreBackchain)
3409     Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
3410 
3411   SDValue Ops[2] = { Result, Chain };
3412   return DAG.getMergeValues(Ops, DL);
3413 }
3414 
3415 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3416     SDValue Op, SelectionDAG &DAG) const {
3417   SDLoc DL(Op);
3418 
3419   return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3420 }
3421 
3422 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3423                                               SelectionDAG &DAG) const {
3424   EVT VT = Op.getValueType();
3425   SDLoc DL(Op);
3426   SDValue Ops[2];
3427   if (is32Bit(VT))
3428     // Just do a normal 64-bit multiplication and extract the results.
3429     // We define this so that it can be used for constant division.
3430     lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3431                     Op.getOperand(1), Ops[1], Ops[0]);
3432   else if (Subtarget.hasMiscellaneousExtensions2())
3433     // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3434     // the high result in the even register.  ISD::SMUL_LOHI is defined to
3435     // return the low half first, so the results are in reverse order.
3436     lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
3437                      Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3438   else {
3439     // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3440     //
3441     //   (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3442     //
3443     // but using the fact that the upper halves are either all zeros
3444     // or all ones:
3445     //
3446     //   (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3447     //
3448     // and grouping the right terms together since they are quicker than the
3449     // multiplication:
3450     //
3451     //   (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3452     SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3453     SDValue LL = Op.getOperand(0);
3454     SDValue RL = Op.getOperand(1);
3455     SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3456     SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3457     // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3458     // the high result in the even register.  ISD::SMUL_LOHI is defined to
3459     // return the low half first, so the results are in reverse order.
3460     lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3461                      LL, RL, Ops[1], Ops[0]);
3462     SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3463     SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3464     SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3465     Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3466   }
3467   return DAG.getMergeValues(Ops, DL);
3468 }
3469 
3470 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3471                                               SelectionDAG &DAG) const {
3472   EVT VT = Op.getValueType();
3473   SDLoc DL(Op);
3474   SDValue Ops[2];
3475   if (is32Bit(VT))
3476     // Just do a normal 64-bit multiplication and extract the results.
3477     // We define this so that it can be used for constant division.
3478     lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3479                     Op.getOperand(1), Ops[1], Ops[0]);
3480   else
3481     // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3482     // the high result in the even register.  ISD::UMUL_LOHI is defined to
3483     // return the low half first, so the results are in reverse order.
3484     lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3485                      Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3486   return DAG.getMergeValues(Ops, DL);
3487 }
3488 
3489 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3490                                             SelectionDAG &DAG) const {
3491   SDValue Op0 = Op.getOperand(0);
3492   SDValue Op1 = Op.getOperand(1);
3493   EVT VT = Op.getValueType();
3494   SDLoc DL(Op);
3495 
3496   // We use DSGF for 32-bit division.  This means the first operand must
3497   // always be 64-bit, and the second operand should be 32-bit whenever
3498   // that is possible, to improve performance.
3499   if (is32Bit(VT))
3500     Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3501   else if (DAG.ComputeNumSignBits(Op1) > 32)
3502     Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3503 
3504   // DSG(F) returns the remainder in the even register and the
3505   // quotient in the odd register.
3506   SDValue Ops[2];
3507   lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3508   return DAG.getMergeValues(Ops, DL);
3509 }
3510 
3511 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3512                                             SelectionDAG &DAG) const {
3513   EVT VT = Op.getValueType();
3514   SDLoc DL(Op);
3515 
3516   // DL(G) returns the remainder in the even register and the
3517   // quotient in the odd register.
3518   SDValue Ops[2];
3519   lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
3520                    Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3521   return DAG.getMergeValues(Ops, DL);
3522 }
3523 
3524 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3525   assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3526 
3527   // Get the known-zero masks for each operand.
3528   SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3529   KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3530                         DAG.computeKnownBits(Ops[1])};
3531 
3532   // See if the upper 32 bits of one operand and the lower 32 bits of the
3533   // other are known zero.  They are the low and high operands respectively.
3534   uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3535                        Known[1].Zero.getZExtValue() };
3536   unsigned High, Low;
3537   if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3538     High = 1, Low = 0;
3539   else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3540     High = 0, Low = 1;
3541   else
3542     return Op;
3543 
3544   SDValue LowOp = Ops[Low];
3545   SDValue HighOp = Ops[High];
3546 
3547   // If the high part is a constant, we're better off using IILH.
3548   if (HighOp.getOpcode() == ISD::Constant)
3549     return Op;
3550 
3551   // If the low part is a constant that is outside the range of LHI,
3552   // then we're better off using IILF.
3553   if (LowOp.getOpcode() == ISD::Constant) {
3554     int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3555     if (!isInt<16>(Value))
3556       return Op;
3557   }
3558 
3559   // Check whether the high part is an AND that doesn't change the
3560   // high 32 bits and just masks out low bits.  We can skip it if so.
3561   if (HighOp.getOpcode() == ISD::AND &&
3562       HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3563     SDValue HighOp0 = HighOp.getOperand(0);
3564     uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3565     if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3566       HighOp = HighOp0;
3567   }
3568 
3569   // Take advantage of the fact that all GR32 operations only change the
3570   // low 32 bits by truncating Low to an i32 and inserting it directly
3571   // using a subreg.  The interesting cases are those where the truncation
3572   // can be folded.
3573   SDLoc DL(Op);
3574   SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3575   return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3576                                    MVT::i64, HighOp, Low32);
3577 }
3578 
3579 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3580 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3581                                           SelectionDAG &DAG) const {
3582   SDNode *N = Op.getNode();
3583   SDValue LHS = N->getOperand(0);
3584   SDValue RHS = N->getOperand(1);
3585   SDLoc DL(N);
3586   unsigned BaseOp = 0;
3587   unsigned CCValid = 0;
3588   unsigned CCMask = 0;
3589 
3590   switch (Op.getOpcode()) {
3591   default: llvm_unreachable("Unknown instruction!");
3592   case ISD::SADDO:
3593     BaseOp = SystemZISD::SADDO;
3594     CCValid = SystemZ::CCMASK_ARITH;
3595     CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3596     break;
3597   case ISD::SSUBO:
3598     BaseOp = SystemZISD::SSUBO;
3599     CCValid = SystemZ::CCMASK_ARITH;
3600     CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3601     break;
3602   case ISD::UADDO:
3603     BaseOp = SystemZISD::UADDO;
3604     CCValid = SystemZ::CCMASK_LOGICAL;
3605     CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3606     break;
3607   case ISD::USUBO:
3608     BaseOp = SystemZISD::USUBO;
3609     CCValid = SystemZ::CCMASK_LOGICAL;
3610     CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3611     break;
3612   }
3613 
3614   SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3615   SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3616 
3617   SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3618   if (N->getValueType(1) == MVT::i1)
3619     SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3620 
3621   return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3622 }
3623 
3624 static bool isAddCarryChain(SDValue Carry) {
3625   while (Carry.getOpcode() == ISD::ADDCARRY)
3626     Carry = Carry.getOperand(2);
3627   return Carry.getOpcode() == ISD::UADDO;
3628 }
3629 
3630 static bool isSubBorrowChain(SDValue Carry) {
3631   while (Carry.getOpcode() == ISD::SUBCARRY)
3632     Carry = Carry.getOperand(2);
3633   return Carry.getOpcode() == ISD::USUBO;
3634 }
3635 
3636 // Lower ADDCARRY/SUBCARRY nodes.
3637 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3638                                                 SelectionDAG &DAG) const {
3639 
3640   SDNode *N = Op.getNode();
3641   MVT VT = N->getSimpleValueType(0);
3642 
3643   // Let legalize expand this if it isn't a legal type yet.
3644   if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3645     return SDValue();
3646 
3647   SDValue LHS = N->getOperand(0);
3648   SDValue RHS = N->getOperand(1);
3649   SDValue Carry = Op.getOperand(2);
3650   SDLoc DL(N);
3651   unsigned BaseOp = 0;
3652   unsigned CCValid = 0;
3653   unsigned CCMask = 0;
3654 
3655   switch (Op.getOpcode()) {
3656   default: llvm_unreachable("Unknown instruction!");
3657   case ISD::ADDCARRY:
3658     if (!isAddCarryChain(Carry))
3659       return SDValue();
3660 
3661     BaseOp = SystemZISD::ADDCARRY;
3662     CCValid = SystemZ::CCMASK_LOGICAL;
3663     CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3664     break;
3665   case ISD::SUBCARRY:
3666     if (!isSubBorrowChain(Carry))
3667       return SDValue();
3668 
3669     BaseOp = SystemZISD::SUBCARRY;
3670     CCValid = SystemZ::CCMASK_LOGICAL;
3671     CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3672     break;
3673   }
3674 
3675   // Set the condition code from the carry flag.
3676   Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3677                       DAG.getConstant(CCValid, DL, MVT::i32),
3678                       DAG.getConstant(CCMask, DL, MVT::i32));
3679 
3680   SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3681   SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3682 
3683   SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3684   if (N->getValueType(1) == MVT::i1)
3685     SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3686 
3687   return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3688 }
3689 
3690 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3691                                           SelectionDAG &DAG) const {
3692   EVT VT = Op.getValueType();
3693   SDLoc DL(Op);
3694   Op = Op.getOperand(0);
3695 
3696   // Handle vector types via VPOPCT.
3697   if (VT.isVector()) {
3698     Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3699     Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
3700     switch (VT.getScalarSizeInBits()) {
3701     case 8:
3702       break;
3703     case 16: {
3704       Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3705       SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3706       SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
3707       Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3708       Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
3709       break;
3710     }
3711     case 32: {
3712       SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3713                                             DAG.getConstant(0, DL, MVT::i32));
3714       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3715       break;
3716     }
3717     case 64: {
3718       SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3719                                             DAG.getConstant(0, DL, MVT::i32));
3720       Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3721       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3722       break;
3723     }
3724     default:
3725       llvm_unreachable("Unexpected type");
3726     }
3727     return Op;
3728   }
3729 
3730   // Get the known-zero mask for the operand.
3731   KnownBits Known = DAG.computeKnownBits(Op);
3732   unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
3733   if (NumSignificantBits == 0)
3734     return DAG.getConstant(0, DL, VT);
3735 
3736   // Skip known-zero high parts of the operand.
3737   int64_t OrigBitSize = VT.getSizeInBits();
3738   int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3739   BitSize = std::min(BitSize, OrigBitSize);
3740 
3741   // The POPCNT instruction counts the number of bits in each byte.
3742   Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3743   Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
3744   Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3745 
3746   // Add up per-byte counts in a binary tree.  All bits of Op at
3747   // position larger than BitSize remain zero throughout.
3748   for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3749     SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3750     if (BitSize != OrigBitSize)
3751       Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3752                         DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3753     Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3754   }
3755 
3756   // Extract overall result from high byte.
3757   if (BitSize > 8)
3758     Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3759                      DAG.getConstant(BitSize - 8, DL, VT));
3760 
3761   return Op;
3762 }
3763 
3764 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3765                                                  SelectionDAG &DAG) const {
3766   SDLoc DL(Op);
3767   AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3768     cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3769   SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3770     cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3771 
3772   // The only fence that needs an instruction is a sequentially-consistent
3773   // cross-thread fence.
3774   if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3775       FenceSSID == SyncScope::System) {
3776     return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3777                                       Op.getOperand(0)),
3778                    0);
3779   }
3780 
3781   // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3782   return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3783 }
3784 
3785 // Op is an atomic load.  Lower it into a normal volatile load.
3786 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3787                                                 SelectionDAG &DAG) const {
3788   auto *Node = cast<AtomicSDNode>(Op.getNode());
3789   return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3790                         Node->getChain(), Node->getBasePtr(),
3791                         Node->getMemoryVT(), Node->getMemOperand());
3792 }
3793 
3794 // Op is an atomic store.  Lower it into a normal volatile store.
3795 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3796                                                  SelectionDAG &DAG) const {
3797   auto *Node = cast<AtomicSDNode>(Op.getNode());
3798   SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3799                                     Node->getBasePtr(), Node->getMemoryVT(),
3800                                     Node->getMemOperand());
3801   // We have to enforce sequential consistency by performing a
3802   // serialization operation after the store.
3803   if (Node->getOrdering() == AtomicOrdering::SequentiallyConsistent)
3804     Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3805                                        MVT::Other, Chain), 0);
3806   return Chain;
3807 }
3808 
3809 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation.  Lower the first
3810 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3811 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3812                                                    SelectionDAG &DAG,
3813                                                    unsigned Opcode) const {
3814   auto *Node = cast<AtomicSDNode>(Op.getNode());
3815 
3816   // 32-bit operations need no code outside the main loop.
3817   EVT NarrowVT = Node->getMemoryVT();
3818   EVT WideVT = MVT::i32;
3819   if (NarrowVT == WideVT)
3820     return Op;
3821 
3822   int64_t BitSize = NarrowVT.getSizeInBits();
3823   SDValue ChainIn = Node->getChain();
3824   SDValue Addr = Node->getBasePtr();
3825   SDValue Src2 = Node->getVal();
3826   MachineMemOperand *MMO = Node->getMemOperand();
3827   SDLoc DL(Node);
3828   EVT PtrVT = Addr.getValueType();
3829 
3830   // Convert atomic subtracts of constants into additions.
3831   if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
3832     if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
3833       Opcode = SystemZISD::ATOMIC_LOADW_ADD;
3834       Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
3835     }
3836 
3837   // Get the address of the containing word.
3838   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3839                                     DAG.getConstant(-4, DL, PtrVT));
3840 
3841   // Get the number of bits that the word must be rotated left in order
3842   // to bring the field to the top bits of a GR32.
3843   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3844                                  DAG.getConstant(3, DL, PtrVT));
3845   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3846 
3847   // Get the complementing shift amount, for rotating a field in the top
3848   // bits back to its proper position.
3849   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3850                                     DAG.getConstant(0, DL, WideVT), BitShift);
3851 
3852   // Extend the source operand to 32 bits and prepare it for the inner loop.
3853   // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3854   // operations require the source to be shifted in advance.  (This shift
3855   // can be folded if the source is constant.)  For AND and NAND, the lower
3856   // bits must be set, while for other opcodes they should be left clear.
3857   if (Opcode != SystemZISD::ATOMIC_SWAPW)
3858     Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
3859                        DAG.getConstant(32 - BitSize, DL, WideVT));
3860   if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3861       Opcode == SystemZISD::ATOMIC_LOADW_NAND)
3862     Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
3863                        DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
3864 
3865   // Construct the ATOMIC_LOADW_* node.
3866   SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3867   SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
3868                     DAG.getConstant(BitSize, DL, WideVT) };
3869   SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
3870                                              NarrowVT, MMO);
3871 
3872   // Rotate the result of the final CS so that the field is in the lower
3873   // bits of a GR32, then truncate it.
3874   SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
3875                                     DAG.getConstant(BitSize, DL, WideVT));
3876   SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
3877 
3878   SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
3879   return DAG.getMergeValues(RetOps, DL);
3880 }
3881 
3882 // Op is an ATOMIC_LOAD_SUB operation.  Lower 8- and 16-bit operations
3883 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3884 // operations into additions.
3885 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
3886                                                     SelectionDAG &DAG) const {
3887   auto *Node = cast<AtomicSDNode>(Op.getNode());
3888   EVT MemVT = Node->getMemoryVT();
3889   if (MemVT == MVT::i32 || MemVT == MVT::i64) {
3890     // A full-width operation.
3891     assert(Op.getValueType() == MemVT && "Mismatched VTs");
3892     SDValue Src2 = Node->getVal();
3893     SDValue NegSrc2;
3894     SDLoc DL(Src2);
3895 
3896     if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
3897       // Use an addition if the operand is constant and either LAA(G) is
3898       // available or the negative value is in the range of A(G)FHI.
3899       int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
3900       if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
3901         NegSrc2 = DAG.getConstant(Value, DL, MemVT);
3902     } else if (Subtarget.hasInterlockedAccess1())
3903       // Use LAA(G) if available.
3904       NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
3905                             Src2);
3906 
3907     if (NegSrc2.getNode())
3908       return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
3909                            Node->getChain(), Node->getBasePtr(), NegSrc2,
3910                            Node->getMemOperand());
3911 
3912     // Use the node as-is.
3913     return Op;
3914   }
3915 
3916   return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
3917 }
3918 
3919 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
3920 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
3921                                                     SelectionDAG &DAG) const {
3922   auto *Node = cast<AtomicSDNode>(Op.getNode());
3923   SDValue ChainIn = Node->getOperand(0);
3924   SDValue Addr = Node->getOperand(1);
3925   SDValue CmpVal = Node->getOperand(2);
3926   SDValue SwapVal = Node->getOperand(3);
3927   MachineMemOperand *MMO = Node->getMemOperand();
3928   SDLoc DL(Node);
3929 
3930   // We have native support for 32-bit and 64-bit compare and swap, but we
3931   // still need to expand extracting the "success" result from the CC.
3932   EVT NarrowVT = Node->getMemoryVT();
3933   EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
3934   if (NarrowVT == WideVT) {
3935     SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3936     SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
3937     SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
3938                                                DL, Tys, Ops, NarrowVT, MMO);
3939     SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3940                                 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
3941 
3942     DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3943     DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
3944     DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3945     return SDValue();
3946   }
3947 
3948   // Convert 8-bit and 16-bit compare and swap to a loop, implemented
3949   // via a fullword ATOMIC_CMP_SWAPW operation.
3950   int64_t BitSize = NarrowVT.getSizeInBits();
3951   EVT PtrVT = Addr.getValueType();
3952 
3953   // Get the address of the containing word.
3954   SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3955                                     DAG.getConstant(-4, DL, PtrVT));
3956 
3957   // Get the number of bits that the word must be rotated left in order
3958   // to bring the field to the top bits of a GR32.
3959   SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3960                                  DAG.getConstant(3, DL, PtrVT));
3961   BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3962 
3963   // Get the complementing shift amount, for rotating a field in the top
3964   // bits back to its proper position.
3965   SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3966                                     DAG.getConstant(0, DL, WideVT), BitShift);
3967 
3968   // Construct the ATOMIC_CMP_SWAPW node.
3969   SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
3970   SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
3971                     NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
3972   SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
3973                                              VTList, Ops, NarrowVT, MMO);
3974   SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
3975                               SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);
3976 
3977   DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3978   DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
3979   DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
3980   return SDValue();
3981 }
3982 
3983 MachineMemOperand::Flags
3984 SystemZTargetLowering::getMMOFlags(const Instruction &I) const {
3985   // Because of how we convert atomic_load and atomic_store to normal loads and
3986   // stores in the DAG, we need to ensure that the MMOs are marked volatile
3987   // since DAGCombine hasn't been updated to account for atomic, but non
3988   // volatile loads.  (See D57601)
3989   if (auto *SI = dyn_cast<StoreInst>(&I))
3990     if (SI->isAtomic())
3991       return MachineMemOperand::MOVolatile;
3992   if (auto *LI = dyn_cast<LoadInst>(&I))
3993     if (LI->isAtomic())
3994       return MachineMemOperand::MOVolatile;
3995   if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
3996     if (AI->isAtomic())
3997       return MachineMemOperand::MOVolatile;
3998   if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
3999     if (AI->isAtomic())
4000       return MachineMemOperand::MOVolatile;
4001   return MachineMemOperand::MONone;
4002 }
4003 
4004 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4005                                               SelectionDAG &DAG) const {
4006   MachineFunction &MF = DAG.getMachineFunction();
4007   MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
4008   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4009     report_fatal_error("Variable-sized stack allocations are not supported "
4010                        "in GHC calling convention");
4011   return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4012                             SystemZ::R15D, Op.getValueType());
4013 }
4014 
4015 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4016                                                  SelectionDAG &DAG) const {
4017   MachineFunction &MF = DAG.getMachineFunction();
4018   MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
4019   bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
4020 
4021   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4022     report_fatal_error("Variable-sized stack allocations are not supported "
4023                        "in GHC calling convention");
4024 
4025   SDValue Chain = Op.getOperand(0);
4026   SDValue NewSP = Op.getOperand(1);
4027   SDValue Backchain;
4028   SDLoc DL(Op);
4029 
4030   if (StoreBackchain) {
4031     SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
4032     Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
4033   }
4034 
4035   Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
4036 
4037   if (StoreBackchain)
4038     Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
4039 
4040   return Chain;
4041 }
4042 
4043 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4044                                              SelectionDAG &DAG) const {
4045   bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4046   if (!IsData)
4047     // Just preserve the chain.
4048     return Op.getOperand(0);
4049 
4050   SDLoc DL(Op);
4051   bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
4052   unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4053   auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4054   SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4055                    Op.getOperand(1)};
4056   return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
4057                                  Node->getVTList(), Ops,
4058                                  Node->getMemoryVT(), Node->getMemOperand());
4059 }
4060 
4061 // Convert condition code in CCReg to an i32 value.
4062 static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
4063   SDLoc DL(CCReg);
4064   SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4065   return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4066                      DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4067 }
4068 
4069 SDValue
4070 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4071                                               SelectionDAG &DAG) const {
4072   unsigned Opcode, CCValid;
4073   if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4074     assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
4075     SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4076     SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4077     DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4078     return SDValue();
4079   }
4080 
4081   return SDValue();
4082 }
4083 
4084 SDValue
4085 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4086                                                SelectionDAG &DAG) const {
4087   unsigned Opcode, CCValid;
4088   if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4089     SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4090     if (Op->getNumValues() == 1)
4091       return getCCResult(DAG, SDValue(Node, 0));
4092     assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
4093     return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4094                        SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4095   }
4096 
4097   unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4098   switch (Id) {
4099   case Intrinsic::thread_pointer:
4100     return lowerThreadPointer(SDLoc(Op), DAG);
4101 
4102   case Intrinsic::s390_vpdi:
4103     return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4104                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4105 
4106   case Intrinsic::s390_vperm:
4107     return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4108                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4109 
4110   case Intrinsic::s390_vuphb:
4111   case Intrinsic::s390_vuphh:
4112   case Intrinsic::s390_vuphf:
4113     return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4114                        Op.getOperand(1));
4115 
4116   case Intrinsic::s390_vuplhb:
4117   case Intrinsic::s390_vuplhh:
4118   case Intrinsic::s390_vuplhf:
4119     return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4120                        Op.getOperand(1));
4121 
4122   case Intrinsic::s390_vuplb:
4123   case Intrinsic::s390_vuplhw:
4124   case Intrinsic::s390_vuplf:
4125     return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4126                        Op.getOperand(1));
4127 
4128   case Intrinsic::s390_vupllb:
4129   case Intrinsic::s390_vupllh:
4130   case Intrinsic::s390_vupllf:
4131     return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4132                        Op.getOperand(1));
4133 
4134   case Intrinsic::s390_vsumb:
4135   case Intrinsic::s390_vsumh:
4136   case Intrinsic::s390_vsumgh:
4137   case Intrinsic::s390_vsumgf:
4138   case Intrinsic::s390_vsumqf:
4139   case Intrinsic::s390_vsumqg:
4140     return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4141                        Op.getOperand(1), Op.getOperand(2));
4142   }
4143 
4144   return SDValue();
4145 }
4146 
4147 namespace {
4148 // Says that SystemZISD operation Opcode can be used to perform the equivalent
4149 // of a VPERM with permute vector Bytes.  If Opcode takes three operands,
4150 // Operand is the constant third operand, otherwise it is the number of
4151 // bytes in each element of the result.
4152 struct Permute {
4153   unsigned Opcode;
4154   unsigned Operand;
4155   unsigned char Bytes[SystemZ::VectorBytes];
4156 };
4157 }
4158 
4159 static const Permute PermuteForms[] = {
4160   // VMRHG
4161   { SystemZISD::MERGE_HIGH, 8,
4162     { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4163   // VMRHF
4164   { SystemZISD::MERGE_HIGH, 4,
4165     { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4166   // VMRHH
4167   { SystemZISD::MERGE_HIGH, 2,
4168     { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4169   // VMRHB
4170   { SystemZISD::MERGE_HIGH, 1,
4171     { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4172   // VMRLG
4173   { SystemZISD::MERGE_LOW, 8,
4174     { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4175   // VMRLF
4176   { SystemZISD::MERGE_LOW, 4,
4177     { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4178   // VMRLH
4179   { SystemZISD::MERGE_LOW, 2,
4180     { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4181   // VMRLB
4182   { SystemZISD::MERGE_LOW, 1,
4183     { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4184   // VPKG
4185   { SystemZISD::PACK, 4,
4186     { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4187   // VPKF
4188   { SystemZISD::PACK, 2,
4189     { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4190   // VPKH
4191   { SystemZISD::PACK, 1,
4192     { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4193   // VPDI V1, V2, 4  (low half of V1, high half of V2)
4194   { SystemZISD::PERMUTE_DWORDS, 4,
4195     { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4196   // VPDI V1, V2, 1  (high half of V1, low half of V2)
4197   { SystemZISD::PERMUTE_DWORDS, 1,
4198     { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4199 };
4200 
4201 // Called after matching a vector shuffle against a particular pattern.
4202 // Both the original shuffle and the pattern have two vector operands.
4203 // OpNos[0] is the operand of the original shuffle that should be used for
4204 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4205 // OpNos[1] is the same for operand 1 of the pattern.  Resolve these -1s and
4206 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4207 // for operands 0 and 1 of the pattern.
4208 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4209   if (OpNos[0] < 0) {
4210     if (OpNos[1] < 0)
4211       return false;
4212     OpNo0 = OpNo1 = OpNos[1];
4213   } else if (OpNos[1] < 0) {
4214     OpNo0 = OpNo1 = OpNos[0];
4215   } else {
4216     OpNo0 = OpNos[0];
4217     OpNo1 = OpNos[1];
4218   }
4219   return true;
4220 }
4221 
4222 // Bytes is a VPERM-like permute vector, except that -1 is used for
4223 // undefined bytes.  Return true if the VPERM can be implemented using P.
4224 // When returning true set OpNo0 to the VPERM operand that should be
4225 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4226 //
4227 // For example, if swapping the VPERM operands allows P to match, OpNo0
4228 // will be 1 and OpNo1 will be 0.  If instead Bytes only refers to one
4229 // operand, but rewriting it to use two duplicated operands allows it to
4230 // match P, then OpNo0 and OpNo1 will be the same.
4231 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
4232                          unsigned &OpNo0, unsigned &OpNo1) {
4233   int OpNos[] = { -1, -1 };
4234   for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4235     int Elt = Bytes[I];
4236     if (Elt >= 0) {
4237       // Make sure that the two permute vectors use the same suboperand
4238       // byte number.  Only the operand numbers (the high bits) are
4239       // allowed to differ.
4240       if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
4241         return false;
4242       int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
4243       int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
4244       // Make sure that the operand mappings are consistent with previous
4245       // elements.
4246       if (OpNos[ModelOpNo] == 1 - RealOpNo)
4247         return false;
4248       OpNos[ModelOpNo] = RealOpNo;
4249     }
4250   }
4251   return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4252 }
4253 
4254 // As above, but search for a matching permute.
4255 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
4256                                    unsigned &OpNo0, unsigned &OpNo1) {
4257   for (auto &P : PermuteForms)
4258     if (matchPermute(Bytes, P, OpNo0, OpNo1))
4259       return &P;
4260   return nullptr;
4261 }
4262 
4263 // Bytes is a VPERM-like permute vector, except that -1 is used for
4264 // undefined bytes.  This permute is an operand of an outer permute.
4265 // See whether redistributing the -1 bytes gives a shuffle that can be
4266 // implemented using P.  If so, set Transform to a VPERM-like permute vector
4267 // that, when applied to the result of P, gives the original permute in Bytes.
4268 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4269                                const Permute &P,
4270                                SmallVectorImpl<int> &Transform) {
4271   unsigned To = 0;
4272   for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
4273     int Elt = Bytes[From];
4274     if (Elt < 0)
4275       // Byte number From of the result is undefined.
4276       Transform[From] = -1;
4277     else {
4278       while (P.Bytes[To] != Elt) {
4279         To += 1;
4280         if (To == SystemZ::VectorBytes)
4281           return false;
4282       }
4283       Transform[From] = To;
4284     }
4285   }
4286   return true;
4287 }
4288 
4289 // As above, but search for a matching permute.
4290 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4291                                          SmallVectorImpl<int> &Transform) {
4292   for (auto &P : PermuteForms)
4293     if (matchDoublePermute(Bytes, P, Transform))
4294       return &P;
4295   return nullptr;
4296 }
4297 
4298 // Convert the mask of the given shuffle op into a byte-level mask,
4299 // as if it had type vNi8.
4300 static bool getVPermMask(SDValue ShuffleOp,
4301                          SmallVectorImpl<int> &Bytes) {
4302   EVT VT = ShuffleOp.getValueType();
4303   unsigned NumElements = VT.getVectorNumElements();
4304   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4305 
4306   if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
4307     Bytes.resize(NumElements * BytesPerElement, -1);
4308     for (unsigned I = 0; I < NumElements; ++I) {
4309       int Index = VSN->getMaskElt(I);
4310       if (Index >= 0)
4311         for (unsigned J = 0; J < BytesPerElement; ++J)
4312           Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4313     }
4314     return true;
4315   }
4316   if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
4317       isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
4318     unsigned Index = ShuffleOp.getConstantOperandVal(1);
4319     Bytes.resize(NumElements * BytesPerElement, -1);
4320     for (unsigned I = 0; I < NumElements; ++I)
4321       for (unsigned J = 0; J < BytesPerElement; ++J)
4322         Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4323     return true;
4324   }
4325   return false;
4326 }
4327 
4328 // Bytes is a VPERM-like permute vector, except that -1 is used for
4329 // undefined bytes.  See whether bytes [Start, Start + BytesPerElement) of
4330 // the result come from a contiguous sequence of bytes from one input.
4331 // Set Base to the selector for the first byte if so.
4332 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
4333                             unsigned BytesPerElement, int &Base) {
4334   Base = -1;
4335   for (unsigned I = 0; I < BytesPerElement; ++I) {
4336     if (Bytes[Start + I] >= 0) {
4337       unsigned Elem = Bytes[Start + I];
4338       if (Base < 0) {
4339         Base = Elem - I;
4340         // Make sure the bytes would come from one input operand.
4341         if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
4342           return false;
4343       } else if (unsigned(Base) != Elem - I)
4344         return false;
4345     }
4346   }
4347   return true;
4348 }
4349 
4350 // Bytes is a VPERM-like permute vector, except that -1 is used for
4351 // undefined bytes.  Return true if it can be performed using VSLDI.
4352 // When returning true, set StartIndex to the shift amount and OpNo0
4353 // and OpNo1 to the VPERM operands that should be used as the first
4354 // and second shift operand respectively.
4355 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
4356                                unsigned &StartIndex, unsigned &OpNo0,
4357                                unsigned &OpNo1) {
4358   int OpNos[] = { -1, -1 };
4359   int Shift = -1;
4360   for (unsigned I = 0; I < 16; ++I) {
4361     int Index = Bytes[I];
4362     if (Index >= 0) {
4363       int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
4364       int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
4365       int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
4366       if (Shift < 0)
4367         Shift = ExpectedShift;
4368       else if (Shift != ExpectedShift)
4369         return false;
4370       // Make sure that the operand mappings are consistent with previous
4371       // elements.
4372       if (OpNos[ModelOpNo] == 1 - RealOpNo)
4373         return false;
4374       OpNos[ModelOpNo] = RealOpNo;
4375     }
4376   }
4377   StartIndex = Shift;
4378   return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4379 }
4380 
4381 // Create a node that performs P on operands Op0 and Op1, casting the
4382 // operands to the appropriate type.  The type of the result is determined by P.
4383 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4384                               const Permute &P, SDValue Op0, SDValue Op1) {
4385   // VPDI (PERMUTE_DWORDS) always operates on v2i64s.  The input
4386   // elements of a PACK are twice as wide as the outputs.
4387   unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
4388                       P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
4389                       P.Operand);
4390   // Cast both operands to the appropriate type.
4391   MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
4392                               SystemZ::VectorBytes / InBytes);
4393   Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
4394   Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
4395   SDValue Op;
4396   if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
4397     SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
4398     Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
4399   } else if (P.Opcode == SystemZISD::PACK) {
4400     MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
4401                                  SystemZ::VectorBytes / P.Operand);
4402     Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4403   } else {
4404     Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4405   }
4406   return Op;
4407 }
4408 
4409 // Bytes is a VPERM-like permute vector, except that -1 is used for
4410 // undefined bytes.  Implement it on operands Ops[0] and Ops[1] using
4411 // VSLDI or VPERM.
4412 static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4413                                      SDValue *Ops,
4414                                      const SmallVectorImpl<int> &Bytes) {
4415   for (unsigned I = 0; I < 2; ++I)
4416     Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4417 
4418   // First see whether VSLDI can be used.
4419   unsigned StartIndex, OpNo0, OpNo1;
4420   if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4421     return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4422                        Ops[OpNo1],
4423                        DAG.getTargetConstant(StartIndex, DL, MVT::i32));
4424 
4425   // Fall back on VPERM.  Construct an SDNode for the permute vector.
4426   SDValue IndexNodes[SystemZ::VectorBytes];
4427   for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4428     if (Bytes[I] >= 0)
4429       IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4430     else
4431       IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4432   SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4433   return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
4434 }
4435 
4436 namespace {
4437 // Describes a general N-operand vector shuffle.
4438 struct GeneralShuffle {
4439   GeneralShuffle(EVT vt) : VT(vt) {}
4440   void addUndef();
4441   bool add(SDValue, unsigned);
4442   SDValue getNode(SelectionDAG &, const SDLoc &);
4443 
4444   // The operands of the shuffle.
4445   SmallVector<SDValue, SystemZ::VectorBytes> Ops;
4446 
4447   // Index I is -1 if byte I of the result is undefined.  Otherwise the
4448   // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4449   // Bytes[I] / SystemZ::VectorBytes.
4450   SmallVector<int, SystemZ::VectorBytes> Bytes;
4451 
4452   // The type of the shuffle result.
4453   EVT VT;
4454 };
4455 }
4456 
4457 // Add an extra undefined element to the shuffle.
4458 void GeneralShuffle::addUndef() {
4459   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4460   for (unsigned I = 0; I < BytesPerElement; ++I)
4461     Bytes.push_back(-1);
4462 }
4463 
4464 // Add an extra element to the shuffle, taking it from element Elem of Op.
4465 // A null Op indicates a vector input whose value will be calculated later;
4466 // there is at most one such input per shuffle and it always has the same
4467 // type as the result. Aborts and returns false if the source vector elements
4468 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4469 // LLVM they become implicitly extended, but this is rare and not optimized.
4470 bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4471   unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4472 
4473   // The source vector can have wider elements than the result,
4474   // either through an explicit TRUNCATE or because of type legalization.
4475   // We want the least significant part.
4476   EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
4477   unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4478 
4479   // Return false if the source elements are smaller than their destination
4480   // elements.
4481   if (FromBytesPerElement < BytesPerElement)
4482     return false;
4483 
4484   unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4485                    (FromBytesPerElement - BytesPerElement));
4486 
4487   // Look through things like shuffles and bitcasts.
4488   while (Op.getNode()) {
4489     if (Op.getOpcode() == ISD::BITCAST)
4490       Op = Op.getOperand(0);
4491     else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
4492       // See whether the bytes we need come from a contiguous part of one
4493       // operand.
4494       SmallVector<int, SystemZ::VectorBytes> OpBytes;
4495       if (!getVPermMask(Op, OpBytes))
4496         break;
4497       int NewByte;
4498       if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4499         break;
4500       if (NewByte < 0) {
4501         addUndef();
4502         return true;
4503       }
4504       Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4505       Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4506     } else if (Op.isUndef()) {
4507       addUndef();
4508       return true;
4509     } else
4510       break;
4511   }
4512 
4513   // Make sure that the source of the extraction is in Ops.
4514   unsigned OpNo = 0;
4515   for (; OpNo < Ops.size(); ++OpNo)
4516     if (Ops[OpNo] == Op)
4517       break;
4518   if (OpNo == Ops.size())
4519     Ops.push_back(Op);
4520 
4521   // Add the element to Bytes.
4522   unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4523   for (unsigned I = 0; I < BytesPerElement; ++I)
4524     Bytes.push_back(Base + I);
4525 
4526   return true;
4527 }
4528 
4529 // Return SDNodes for the completed shuffle.
4530 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4531   assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
4532 
4533   if (Ops.size() == 0)
4534     return DAG.getUNDEF(VT);
4535 
4536   // Make sure that there are at least two shuffle operands.
4537   if (Ops.size() == 1)
4538     Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4539 
4540   // Create a tree of shuffles, deferring root node until after the loop.
4541   // Try to redistribute the undefined elements of non-root nodes so that
4542   // the non-root shuffles match something like a pack or merge, then adjust
4543   // the parent node's permute vector to compensate for the new order.
4544   // Among other things, this copes with vectors like <2 x i16> that were
4545   // padded with undefined elements during type legalization.
4546   //
4547   // In the best case this redistribution will lead to the whole tree
4548   // using packs and merges.  It should rarely be a loss in other cases.
4549   unsigned Stride = 1;
4550   for (; Stride * 2 < Ops.size(); Stride *= 2) {
4551     for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
4552       SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
4553 
4554       // Create a mask for just these two operands.
4555       SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
4556       for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4557         unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
4558         unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
4559         if (OpNo == I)
4560           NewBytes[J] = Byte;
4561         else if (OpNo == I + Stride)
4562           NewBytes[J] = SystemZ::VectorBytes + Byte;
4563         else
4564           NewBytes[J] = -1;
4565       }
4566       // See if it would be better to reorganize NewMask to avoid using VPERM.
4567       SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
4568       if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
4569         Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
4570         // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4571         for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4572           if (NewBytes[J] >= 0) {
4573             assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
4574                    "Invalid double permute");
4575             Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
4576           } else
4577             assert(NewBytesMap[J] < 0 && "Invalid double permute");
4578         }
4579       } else {
4580         // Just use NewBytes on the operands.
4581         Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
4582         for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
4583           if (NewBytes[J] >= 0)
4584             Bytes[J] = I * SystemZ::VectorBytes + J;
4585       }
4586     }
4587   }
4588 
4589   // Now we just have 2 inputs.  Put the second operand in Ops[1].
4590   if (Stride > 1) {
4591     Ops[1] = Ops[Stride];
4592     for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4593       if (Bytes[I] >= int(SystemZ::VectorBytes))
4594         Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
4595   }
4596 
4597   // Look for an instruction that can do the permute without resorting
4598   // to VPERM.
4599   unsigned OpNo0, OpNo1;
4600   SDValue Op;
4601   if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
4602     Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
4603   else
4604     Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
4605   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4606 }
4607 
4608 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4609 static bool isScalarToVector(SDValue Op) {
4610   for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
4611     if (!Op.getOperand(I).isUndef())
4612       return false;
4613   return true;
4614 }
4615 
4616 // Return a vector of type VT that contains Value in the first element.
4617 // The other elements don't matter.
4618 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4619                                    SDValue Value) {
4620   // If we have a constant, replicate it to all elements and let the
4621   // BUILD_VECTOR lowering take care of it.
4622   if (Value.getOpcode() == ISD::Constant ||
4623       Value.getOpcode() == ISD::ConstantFP) {
4624     SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
4625     return DAG.getBuildVector(VT, DL, Ops);
4626   }
4627   if (Value.isUndef())
4628     return DAG.getUNDEF(VT);
4629   return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
4630 }
4631 
4632 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4633 // element 1.  Used for cases in which replication is cheap.
4634 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4635                                  SDValue Op0, SDValue Op1) {
4636   if (Op0.isUndef()) {
4637     if (Op1.isUndef())
4638       return DAG.getUNDEF(VT);
4639     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
4640   }
4641   if (Op1.isUndef())
4642     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
4643   return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
4644                      buildScalarToVector(DAG, DL, VT, Op0),
4645                      buildScalarToVector(DAG, DL, VT, Op1));
4646 }
4647 
4648 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4649 // vector for them.
4650 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
4651                           SDValue Op1) {
4652   if (Op0.isUndef() && Op1.isUndef())
4653     return DAG.getUNDEF(MVT::v2i64);
4654   // If one of the two inputs is undefined then replicate the other one,
4655   // in order to avoid using another register unnecessarily.
4656   if (Op0.isUndef())
4657     Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4658   else if (Op1.isUndef())
4659     Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4660   else {
4661     Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4662     Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4663   }
4664   return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
4665 }
4666 
4667 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4668 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4669 // the non-EXTRACT_VECTOR_ELT elements.  See if the given BUILD_VECTOR
4670 // would benefit from this representation and return it if so.
4671 static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
4672                                      BuildVectorSDNode *BVN) {
4673   EVT VT = BVN->getValueType(0);
4674   unsigned NumElements = VT.getVectorNumElements();
4675 
4676   // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4677   // on byte vectors.  If there are non-EXTRACT_VECTOR_ELT elements that still
4678   // need a BUILD_VECTOR, add an additional placeholder operand for that
4679   // BUILD_VECTOR and store its operands in ResidueOps.
4680   GeneralShuffle GS(VT);
4681   SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
4682   bool FoundOne = false;
4683   for (unsigned I = 0; I < NumElements; ++I) {
4684     SDValue Op = BVN->getOperand(I);
4685     if (Op.getOpcode() == ISD::TRUNCATE)
4686       Op = Op.getOperand(0);
4687     if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4688         Op.getOperand(1).getOpcode() == ISD::Constant) {
4689       unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
4690       if (!GS.add(Op.getOperand(0), Elem))
4691         return SDValue();
4692       FoundOne = true;
4693     } else if (Op.isUndef()) {
4694       GS.addUndef();
4695     } else {
4696       if (!GS.add(SDValue(), ResidueOps.size()))
4697         return SDValue();
4698       ResidueOps.push_back(BVN->getOperand(I));
4699     }
4700   }
4701 
4702   // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
4703   if (!FoundOne)
4704     return SDValue();
4705 
4706   // Create the BUILD_VECTOR for the remaining elements, if any.
4707   if (!ResidueOps.empty()) {
4708     while (ResidueOps.size() < NumElements)
4709       ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
4710     for (auto &Op : GS.Ops) {
4711       if (!Op.getNode()) {
4712         Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
4713         break;
4714       }
4715     }
4716   }
4717   return GS.getNode(DAG, SDLoc(BVN));
4718 }
4719 
4720 bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
4721   if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
4722     return true;
4723   if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
4724     return true;
4725   return false;
4726 }
4727 
4728 // Combine GPR scalar values Elems into a vector of type VT.
4729 SDValue
4730 SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4731                                    SmallVectorImpl<SDValue> &Elems) const {
4732   // See whether there is a single replicated value.
4733   SDValue Single;
4734   unsigned int NumElements = Elems.size();
4735   unsigned int Count = 0;
4736   for (auto Elem : Elems) {
4737     if (!Elem.isUndef()) {
4738       if (!Single.getNode())
4739         Single = Elem;
4740       else if (Elem != Single) {
4741         Single = SDValue();
4742         break;
4743       }
4744       Count += 1;
4745     }
4746   }
4747   // There are three cases here:
4748   //
4749   // - if the only defined element is a loaded one, the best sequence
4750   //   is a replicating load.
4751   //
4752   // - otherwise, if the only defined element is an i64 value, we will
4753   //   end up with the same VLVGP sequence regardless of whether we short-cut
4754   //   for replication or fall through to the later code.
4755   //
4756   // - otherwise, if the only defined element is an i32 or smaller value,
4757   //   we would need 2 instructions to replicate it: VLVGP followed by VREPx.
4758   //   This is only a win if the single defined element is used more than once.
4759   //   In other cases we're better off using a single VLVGx.
4760   if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
4761     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
4762 
4763   // If all elements are loads, use VLREP/VLEs (below).
4764   bool AllLoads = true;
4765   for (auto Elem : Elems)
4766     if (!isVectorElementLoad(Elem)) {
4767       AllLoads = false;
4768       break;
4769     }
4770 
4771   // The best way of building a v2i64 from two i64s is to use VLVGP.
4772   if (VT == MVT::v2i64 && !AllLoads)
4773     return joinDwords(DAG, DL, Elems[0], Elems[1]);
4774 
4775   // Use a 64-bit merge high to combine two doubles.
4776   if (VT == MVT::v2f64 && !AllLoads)
4777     return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4778 
4779   // Build v4f32 values directly from the FPRs:
4780   //
4781   //   <Axxx> <Bxxx> <Cxxxx> <Dxxx>
4782   //         V              V         VMRHF
4783   //      <ABxx>         <CDxx>
4784   //                V                 VMRHG
4785   //              <ABCD>
4786   if (VT == MVT::v4f32 && !AllLoads) {
4787     SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4788     SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
4789     // Avoid unnecessary undefs by reusing the other operand.
4790     if (Op01.isUndef())
4791       Op01 = Op23;
4792     else if (Op23.isUndef())
4793       Op23 = Op01;
4794     // Merging identical replications is a no-op.
4795     if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
4796       return Op01;
4797     Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
4798     Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
4799     SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
4800                              DL, MVT::v2i64, Op01, Op23);
4801     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4802   }
4803 
4804   // Collect the constant terms.
4805   SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
4806   SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
4807 
4808   unsigned NumConstants = 0;
4809   for (unsigned I = 0; I < NumElements; ++I) {
4810     SDValue Elem = Elems[I];
4811     if (Elem.getOpcode() == ISD::Constant ||
4812         Elem.getOpcode() == ISD::ConstantFP) {
4813       NumConstants += 1;
4814       Constants[I] = Elem;
4815       Done[I] = true;
4816     }
4817   }
4818   // If there was at least one constant, fill in the other elements of
4819   // Constants with undefs to get a full vector constant and use that
4820   // as the starting point.
4821   SDValue Result;
4822   SDValue ReplicatedVal;
4823   if (NumConstants > 0) {
4824     for (unsigned I = 0; I < NumElements; ++I)
4825       if (!Constants[I].getNode())
4826         Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
4827     Result = DAG.getBuildVector(VT, DL, Constants);
4828   } else {
4829     // Otherwise try to use VLREP or VLVGP to start the sequence in order to
4830     // avoid a false dependency on any previous contents of the vector
4831     // register.
4832 
4833     // Use a VLREP if at least one element is a load. Make sure to replicate
4834     // the load with the most elements having its value.
4835     std::map<const SDNode*, unsigned> UseCounts;
4836     SDNode *LoadMaxUses = nullptr;
4837     for (unsigned I = 0; I < NumElements; ++I)
4838       if (isVectorElementLoad(Elems[I])) {
4839         SDNode *Ld = Elems[I].getNode();
4840         UseCounts[Ld]++;
4841         if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
4842           LoadMaxUses = Ld;
4843       }
4844     if (LoadMaxUses != nullptr) {
4845       ReplicatedVal = SDValue(LoadMaxUses, 0);
4846       Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
4847     } else {
4848       // Try to use VLVGP.
4849       unsigned I1 = NumElements / 2 - 1;
4850       unsigned I2 = NumElements - 1;
4851       bool Def1 = !Elems[I1].isUndef();
4852       bool Def2 = !Elems[I2].isUndef();
4853       if (Def1 || Def2) {
4854         SDValue Elem1 = Elems[Def1 ? I1 : I2];
4855         SDValue Elem2 = Elems[Def2 ? I2 : I1];
4856         Result = DAG.getNode(ISD::BITCAST, DL, VT,
4857                              joinDwords(DAG, DL, Elem1, Elem2));
4858         Done[I1] = true;
4859         Done[I2] = true;
4860       } else
4861         Result = DAG.getUNDEF(VT);
4862     }
4863   }
4864 
4865   // Use VLVGx to insert the other elements.
4866   for (unsigned I = 0; I < NumElements; ++I)
4867     if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
4868       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
4869                            DAG.getConstant(I, DL, MVT::i32));
4870   return Result;
4871 }
4872 
4873 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
4874                                                  SelectionDAG &DAG) const {
4875   auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
4876   SDLoc DL(Op);
4877   EVT VT = Op.getValueType();
4878 
4879   if (BVN->isConstant()) {
4880     if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
4881       return Op;
4882 
4883     // Fall back to loading it from memory.
4884     return SDValue();
4885   }
4886 
4887   // See if we should use shuffles to construct the vector from other vectors.
4888   if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
4889     return Res;
4890 
4891   // Detect SCALAR_TO_VECTOR conversions.
4892   if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
4893     return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
4894 
4895   // Otherwise use buildVector to build the vector up from GPRs.
4896   unsigned NumElements = Op.getNumOperands();
4897   SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
4898   for (unsigned I = 0; I < NumElements; ++I)
4899     Ops[I] = Op.getOperand(I);
4900   return buildVector(DAG, DL, VT, Ops);
4901 }
4902 
4903 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
4904                                                    SelectionDAG &DAG) const {
4905   auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
4906   SDLoc DL(Op);
4907   EVT VT = Op.getValueType();
4908   unsigned NumElements = VT.getVectorNumElements();
4909 
4910   if (VSN->isSplat()) {
4911     SDValue Op0 = Op.getOperand(0);
4912     unsigned Index = VSN->getSplatIndex();
4913     assert(Index < VT.getVectorNumElements() &&
4914            "Splat index should be defined and in first operand");
4915     // See whether the value we're splatting is directly available as a scalar.
4916     if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
4917         Op0.getOpcode() == ISD::BUILD_VECTOR)
4918       return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
4919     // Otherwise keep it as a vector-to-vector operation.
4920     return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
4921                        DAG.getTargetConstant(Index, DL, MVT::i32));
4922   }
4923 
4924   GeneralShuffle GS(VT);
4925   for (unsigned I = 0; I < NumElements; ++I) {
4926     int Elt = VSN->getMaskElt(I);
4927     if (Elt < 0)
4928       GS.addUndef();
4929     else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
4930                      unsigned(Elt) % NumElements))
4931       return SDValue();
4932   }
4933   return GS.getNode(DAG, SDLoc(VSN));
4934 }
4935 
4936 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
4937                                                      SelectionDAG &DAG) const {
4938   SDLoc DL(Op);
4939   // Just insert the scalar into element 0 of an undefined vector.
4940   return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4941                      Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
4942                      Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
4943 }
4944 
4945 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
4946                                                       SelectionDAG &DAG) const {
4947   // Handle insertions of floating-point values.
4948   SDLoc DL(Op);
4949   SDValue Op0 = Op.getOperand(0);
4950   SDValue Op1 = Op.getOperand(1);
4951   SDValue Op2 = Op.getOperand(2);
4952   EVT VT = Op.getValueType();
4953 
4954   // Insertions into constant indices of a v2f64 can be done using VPDI.
4955   // However, if the inserted value is a bitcast or a constant then it's
4956   // better to use GPRs, as below.
4957   if (VT == MVT::v2f64 &&
4958       Op1.getOpcode() != ISD::BITCAST &&
4959       Op1.getOpcode() != ISD::ConstantFP &&
4960       Op2.getOpcode() == ISD::Constant) {
4961     uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue();
4962     unsigned Mask = VT.getVectorNumElements() - 1;
4963     if (Index <= Mask)
4964       return Op;
4965   }
4966 
4967   // Otherwise bitcast to the equivalent integer form and insert via a GPR.
4968   MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
4969   MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
4970   SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
4971                             DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
4972                             DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
4973   return DAG.getNode(ISD::BITCAST, DL, VT, Res);
4974 }
4975 
4976 SDValue
4977 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
4978                                                SelectionDAG &DAG) const {
4979   // Handle extractions of floating-point values.
4980   SDLoc DL(Op);
4981   SDValue Op0 = Op.getOperand(0);
4982   SDValue Op1 = Op.getOperand(1);
4983   EVT VT = Op.getValueType();
4984   EVT VecVT = Op0.getValueType();
4985 
4986   // Extractions of constant indices can be done directly.
4987   if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
4988     uint64_t Index = CIndexN->getZExtValue();
4989     unsigned Mask = VecVT.getVectorNumElements() - 1;
4990     if (Index <= Mask)
4991       return Op;
4992   }
4993 
4994   // Otherwise bitcast to the equivalent integer form and extract via a GPR.
4995   MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
4996   MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
4997   SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
4998                             DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
4999   return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5000 }
5001 
5002 SDValue
5003 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
5004                                               unsigned UnpackHigh) const {
5005   SDValue PackedOp = Op.getOperand(0);
5006   EVT OutVT = Op.getValueType();
5007   EVT InVT = PackedOp.getValueType();
5008   unsigned ToBits = OutVT.getScalarSizeInBits();
5009   unsigned FromBits = InVT.getScalarSizeInBits();
5010   do {
5011     FromBits *= 2;
5012     EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5013                                  SystemZ::VectorBits / FromBits);
5014     PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
5015   } while (FromBits != ToBits);
5016   return PackedOp;
5017 }
5018 
5019 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
5020                                           unsigned ByScalar) const {
5021   // Look for cases where a vector shift can use the *_BY_SCALAR form.
5022   SDValue Op0 = Op.getOperand(0);
5023   SDValue Op1 = Op.getOperand(1);
5024   SDLoc DL(Op);
5025   EVT VT = Op.getValueType();
5026   unsigned ElemBitSize = VT.getScalarSizeInBits();
5027 
5028   // See whether the shift vector is a splat represented as BUILD_VECTOR.
5029   if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
5030     APInt SplatBits, SplatUndef;
5031     unsigned SplatBitSize;
5032     bool HasAnyUndefs;
5033     // Check for constant splats.  Use ElemBitSize as the minimum element
5034     // width and reject splats that need wider elements.
5035     if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
5036                              ElemBitSize, true) &&
5037         SplatBitSize == ElemBitSize) {
5038       SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
5039                                       DL, MVT::i32);
5040       return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5041     }
5042     // Check for variable splats.
5043     BitVector UndefElements;
5044     SDValue Splat = BVN->getSplatValue(&UndefElements);
5045     if (Splat) {
5046       // Since i32 is the smallest legal type, we either need a no-op
5047       // or a truncation.
5048       SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
5049       return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5050     }
5051   }
5052 
5053   // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
5054   // and the shift amount is directly available in a GPR.
5055   if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
5056     if (VSN->isSplat()) {
5057       SDValue VSNOp0 = VSN->getOperand(0);
5058       unsigned Index = VSN->getSplatIndex();
5059       assert(Index < VT.getVectorNumElements() &&
5060              "Splat index should be defined and in first operand");
5061       if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5062           VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
5063         // Since i32 is the smallest legal type, we either need a no-op
5064         // or a truncation.
5065         SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
5066                                     VSNOp0.getOperand(Index));
5067         return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5068       }
5069     }
5070   }
5071 
5072   // Otherwise just treat the current form as legal.
5073   return Op;
5074 }
5075 
5076 SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
5077                                               SelectionDAG &DAG) const {
5078   switch (Op.getOpcode()) {
5079   case ISD::FRAMEADDR:
5080     return lowerFRAMEADDR(Op, DAG);
5081   case ISD::RETURNADDR:
5082     return lowerRETURNADDR(Op, DAG);
5083   case ISD::BR_CC:
5084     return lowerBR_CC(Op, DAG);
5085   case ISD::SELECT_CC:
5086     return lowerSELECT_CC(Op, DAG);
5087   case ISD::SETCC:
5088     return lowerSETCC(Op, DAG);
5089   case ISD::STRICT_FSETCC:
5090     return lowerSTRICT_FSETCC(Op, DAG, false);
5091   case ISD::STRICT_FSETCCS:
5092     return lowerSTRICT_FSETCC(Op, DAG, true);
5093   case ISD::GlobalAddress:
5094     return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
5095   case ISD::GlobalTLSAddress:
5096     return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
5097   case ISD::BlockAddress:
5098     return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
5099   case ISD::JumpTable:
5100     return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
5101   case ISD::ConstantPool:
5102     return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
5103   case ISD::BITCAST:
5104     return lowerBITCAST(Op, DAG);
5105   case ISD::VASTART:
5106     return lowerVASTART(Op, DAG);
5107   case ISD::VACOPY:
5108     return lowerVACOPY(Op, DAG);
5109   case ISD::DYNAMIC_STACKALLOC:
5110     return lowerDYNAMIC_STACKALLOC(Op, DAG);
5111   case ISD::GET_DYNAMIC_AREA_OFFSET:
5112     return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
5113   case ISD::SMUL_LOHI:
5114     return lowerSMUL_LOHI(Op, DAG);
5115   case ISD::UMUL_LOHI:
5116     return lowerUMUL_LOHI(Op, DAG);
5117   case ISD::SDIVREM:
5118     return lowerSDIVREM(Op, DAG);
5119   case ISD::UDIVREM:
5120     return lowerUDIVREM(Op, DAG);
5121   case ISD::SADDO:
5122   case ISD::SSUBO:
5123   case ISD::UADDO:
5124   case ISD::USUBO:
5125     return lowerXALUO(Op, DAG);
5126   case ISD::ADDCARRY:
5127   case ISD::SUBCARRY:
5128     return lowerADDSUBCARRY(Op, DAG);
5129   case ISD::OR:
5130     return lowerOR(Op, DAG);
5131   case ISD::CTPOP:
5132     return lowerCTPOP(Op, DAG);
5133   case ISD::ATOMIC_FENCE:
5134     return lowerATOMIC_FENCE(Op, DAG);
5135   case ISD::ATOMIC_SWAP:
5136     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
5137   case ISD::ATOMIC_STORE:
5138     return lowerATOMIC_STORE(Op, DAG);
5139   case ISD::ATOMIC_LOAD:
5140     return lowerATOMIC_LOAD(Op, DAG);
5141   case ISD::ATOMIC_LOAD_ADD:
5142     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
5143   case ISD::ATOMIC_LOAD_SUB:
5144     return lowerATOMIC_LOAD_SUB(Op, DAG);
5145   case ISD::ATOMIC_LOAD_AND:
5146     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
5147   case ISD::ATOMIC_LOAD_OR:
5148     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
5149   case ISD::ATOMIC_LOAD_XOR:
5150     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
5151   case ISD::ATOMIC_LOAD_NAND:
5152     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
5153   case ISD::ATOMIC_LOAD_MIN:
5154     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
5155   case ISD::ATOMIC_LOAD_MAX:
5156     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
5157   case ISD::ATOMIC_LOAD_UMIN:
5158     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
5159   case ISD::ATOMIC_LOAD_UMAX:
5160     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
5161   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
5162     return lowerATOMIC_CMP_SWAP(Op, DAG);
5163   case ISD::STACKSAVE:
5164     return lowerSTACKSAVE(Op, DAG);
5165   case ISD::STACKRESTORE:
5166     return lowerSTACKRESTORE(Op, DAG);
5167   case ISD::PREFETCH:
5168     return lowerPREFETCH(Op, DAG);
5169   case ISD::INTRINSIC_W_CHAIN:
5170     return lowerINTRINSIC_W_CHAIN(Op, DAG);
5171   case ISD::INTRINSIC_WO_CHAIN:
5172     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
5173   case ISD::BUILD_VECTOR:
5174     return lowerBUILD_VECTOR(Op, DAG);
5175   case ISD::VECTOR_SHUFFLE:
5176     return lowerVECTOR_SHUFFLE(Op, DAG);
5177   case ISD::SCALAR_TO_VECTOR:
5178     return lowerSCALAR_TO_VECTOR(Op, DAG);
5179   case ISD::INSERT_VECTOR_ELT:
5180     return lowerINSERT_VECTOR_ELT(Op, DAG);
5181   case ISD::EXTRACT_VECTOR_ELT:
5182     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
5183   case ISD::SIGN_EXTEND_VECTOR_INREG:
5184     return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
5185   case ISD::ZERO_EXTEND_VECTOR_INREG:
5186     return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
5187   case ISD::SHL:
5188     return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
5189   case ISD::SRL:
5190     return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
5191   case ISD::SRA:
5192     return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
5193   default:
5194     llvm_unreachable("Unexpected node to lower");
5195   }
5196 }
5197 
5198 // Lower operations with invalid operand or result types (currently used
5199 // only for 128-bit integer types).
5200 
5201 static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
5202   SDLoc DL(In);
5203   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
5204                            DAG.getIntPtrConstant(0, DL));
5205   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
5206                            DAG.getIntPtrConstant(1, DL));
5207   SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
5208                                     MVT::Untyped, Hi, Lo);
5209   return SDValue(Pair, 0);
5210 }
5211 
5212 static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
5213   SDLoc DL(In);
5214   SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
5215                                           DL, MVT::i64, In);
5216   SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
5217                                           DL, MVT::i64, In);
5218   return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
5219 }
5220 
5221 void
5222 SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
5223                                              SmallVectorImpl<SDValue> &Results,
5224                                              SelectionDAG &DAG) const {
5225   switch (N->getOpcode()) {
5226   case ISD::ATOMIC_LOAD: {
5227     SDLoc DL(N);
5228     SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
5229     SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
5230     MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5231     SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
5232                                           DL, Tys, Ops, MVT::i128, MMO);
5233     Results.push_back(lowerGR128ToI128(DAG, Res));
5234     Results.push_back(Res.getValue(1));
5235     break;
5236   }
5237   case ISD::ATOMIC_STORE: {
5238     SDLoc DL(N);
5239     SDVTList Tys = DAG.getVTList(MVT::Other);
5240     SDValue Ops[] = { N->getOperand(0),
5241                       lowerI128ToGR128(DAG, N->getOperand(2)),
5242                       N->getOperand(1) };
5243     MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5244     SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
5245                                           DL, Tys, Ops, MVT::i128, MMO);
5246     // We have to enforce sequential consistency by performing a
5247     // serialization operation after the store.
5248     if (cast<AtomicSDNode>(N)->getOrdering() ==
5249         AtomicOrdering::SequentiallyConsistent)
5250       Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
5251                                        MVT::Other, Res), 0);
5252     Results.push_back(Res);
5253     break;
5254   }
5255   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
5256     SDLoc DL(N);
5257     SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
5258     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
5259                       lowerI128ToGR128(DAG, N->getOperand(2)),
5260                       lowerI128ToGR128(DAG, N->getOperand(3)) };
5261     MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5262     SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
5263                                           DL, Tys, Ops, MVT::i128, MMO);
5264     SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
5265                                 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
5266     Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
5267     Results.push_back(lowerGR128ToI128(DAG, Res));
5268     Results.push_back(Success);
5269     Results.push_back(Res.getValue(2));
5270     break;
5271   }
5272   default:
5273     llvm_unreachable("Unexpected node to lower");
5274   }
5275 }
5276 
5277 void
5278 SystemZTargetLowering::ReplaceNodeResults(SDNode *N,
5279                                           SmallVectorImpl<SDValue> &Results,
5280                                           SelectionDAG &DAG) const {
5281   return LowerOperationWrapper(N, Results, DAG);
5282 }
5283 
5284 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
5285 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
5286   switch ((SystemZISD::NodeType)Opcode) {
5287     case SystemZISD::FIRST_NUMBER: break;
5288     OPCODE(RET_FLAG);
5289     OPCODE(CALL);
5290     OPCODE(SIBCALL);
5291     OPCODE(TLS_GDCALL);
5292     OPCODE(TLS_LDCALL);
5293     OPCODE(PCREL_WRAPPER);
5294     OPCODE(PCREL_OFFSET);
5295     OPCODE(IABS);
5296     OPCODE(ICMP);
5297     OPCODE(FCMP);
5298     OPCODE(STRICT_FCMP);
5299     OPCODE(STRICT_FCMPS);
5300     OPCODE(TM);
5301     OPCODE(BR_CCMASK);
5302     OPCODE(SELECT_CCMASK);
5303     OPCODE(ADJDYNALLOC);
5304     OPCODE(POPCNT);
5305     OPCODE(SMUL_LOHI);
5306     OPCODE(UMUL_LOHI);
5307     OPCODE(SDIVREM);
5308     OPCODE(UDIVREM);
5309     OPCODE(SADDO);
5310     OPCODE(SSUBO);
5311     OPCODE(UADDO);
5312     OPCODE(USUBO);
5313     OPCODE(ADDCARRY);
5314     OPCODE(SUBCARRY);
5315     OPCODE(GET_CCMASK);
5316     OPCODE(MVC);
5317     OPCODE(MVC_LOOP);
5318     OPCODE(NC);
5319     OPCODE(NC_LOOP);
5320     OPCODE(OC);
5321     OPCODE(OC_LOOP);
5322     OPCODE(XC);
5323     OPCODE(XC_LOOP);
5324     OPCODE(CLC);
5325     OPCODE(CLC_LOOP);
5326     OPCODE(STPCPY);
5327     OPCODE(STRCMP);
5328     OPCODE(SEARCH_STRING);
5329     OPCODE(IPM);
5330     OPCODE(MEMBARRIER);
5331     OPCODE(TBEGIN);
5332     OPCODE(TBEGIN_NOFLOAT);
5333     OPCODE(TEND);
5334     OPCODE(BYTE_MASK);
5335     OPCODE(ROTATE_MASK);
5336     OPCODE(REPLICATE);
5337     OPCODE(JOIN_DWORDS);
5338     OPCODE(SPLAT);
5339     OPCODE(MERGE_HIGH);
5340     OPCODE(MERGE_LOW);
5341     OPCODE(SHL_DOUBLE);
5342     OPCODE(PERMUTE_DWORDS);
5343     OPCODE(PERMUTE);
5344     OPCODE(PACK);
5345     OPCODE(PACKS_CC);
5346     OPCODE(PACKLS_CC);
5347     OPCODE(UNPACK_HIGH);
5348     OPCODE(UNPACKL_HIGH);
5349     OPCODE(UNPACK_LOW);
5350     OPCODE(UNPACKL_LOW);
5351     OPCODE(VSHL_BY_SCALAR);
5352     OPCODE(VSRL_BY_SCALAR);
5353     OPCODE(VSRA_BY_SCALAR);
5354     OPCODE(VSUM);
5355     OPCODE(VICMPE);
5356     OPCODE(VICMPH);
5357     OPCODE(VICMPHL);
5358     OPCODE(VICMPES);
5359     OPCODE(VICMPHS);
5360     OPCODE(VICMPHLS);
5361     OPCODE(VFCMPE);
5362     OPCODE(STRICT_VFCMPE);
5363     OPCODE(STRICT_VFCMPES);
5364     OPCODE(VFCMPH);
5365     OPCODE(STRICT_VFCMPH);
5366     OPCODE(STRICT_VFCMPHS);
5367     OPCODE(VFCMPHE);
5368     OPCODE(STRICT_VFCMPHE);
5369     OPCODE(STRICT_VFCMPHES);
5370     OPCODE(VFCMPES);
5371     OPCODE(VFCMPHS);
5372     OPCODE(VFCMPHES);
5373     OPCODE(VFTCI);
5374     OPCODE(VEXTEND);
5375     OPCODE(STRICT_VEXTEND);
5376     OPCODE(VROUND);
5377     OPCODE(VTM);
5378     OPCODE(VFAE_CC);
5379     OPCODE(VFAEZ_CC);
5380     OPCODE(VFEE_CC);
5381     OPCODE(VFEEZ_CC);
5382     OPCODE(VFENE_CC);
5383     OPCODE(VFENEZ_CC);
5384     OPCODE(VISTR_CC);
5385     OPCODE(VSTRC_CC);
5386     OPCODE(VSTRCZ_CC);
5387     OPCODE(VSTRS_CC);
5388     OPCODE(VSTRSZ_CC);
5389     OPCODE(TDC);
5390     OPCODE(ATOMIC_SWAPW);
5391     OPCODE(ATOMIC_LOADW_ADD);
5392     OPCODE(ATOMIC_LOADW_SUB);
5393     OPCODE(ATOMIC_LOADW_AND);
5394     OPCODE(ATOMIC_LOADW_OR);
5395     OPCODE(ATOMIC_LOADW_XOR);
5396     OPCODE(ATOMIC_LOADW_NAND);
5397     OPCODE(ATOMIC_LOADW_MIN);
5398     OPCODE(ATOMIC_LOADW_MAX);
5399     OPCODE(ATOMIC_LOADW_UMIN);
5400     OPCODE(ATOMIC_LOADW_UMAX);
5401     OPCODE(ATOMIC_CMP_SWAPW);
5402     OPCODE(ATOMIC_CMP_SWAP);
5403     OPCODE(ATOMIC_LOAD_128);
5404     OPCODE(ATOMIC_STORE_128);
5405     OPCODE(ATOMIC_CMP_SWAP_128);
5406     OPCODE(LRV);
5407     OPCODE(STRV);
5408     OPCODE(VLER);
5409     OPCODE(VSTER);
5410     OPCODE(PREFETCH);
5411   }
5412   return nullptr;
5413 #undef OPCODE
5414 }
5415 
5416 // Return true if VT is a vector whose elements are a whole number of bytes
5417 // in width. Also check for presence of vector support.
5418 bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
5419   if (!Subtarget.hasVector())
5420     return false;
5421 
5422   return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
5423 }
5424 
5425 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
5426 // producing a result of type ResVT.  Op is a possibly bitcast version
5427 // of the input vector and Index is the index (based on type VecVT) that
5428 // should be extracted.  Return the new extraction if a simplification
5429 // was possible or if Force is true.
5430 SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
5431                                               EVT VecVT, SDValue Op,
5432                                               unsigned Index,
5433                                               DAGCombinerInfo &DCI,
5434                                               bool Force) const {
5435   SelectionDAG &DAG = DCI.DAG;
5436 
5437   // The number of bytes being extracted.
5438   unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
5439 
5440   for (;;) {
5441     unsigned Opcode = Op.getOpcode();
5442     if (Opcode == ISD::BITCAST)
5443       // Look through bitcasts.
5444       Op = Op.getOperand(0);
5445     else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
5446              canTreatAsByteVector(Op.getValueType())) {
5447       // Get a VPERM-like permute mask and see whether the bytes covered
5448       // by the extracted element are a contiguous sequence from one
5449       // source operand.
5450       SmallVector<int, SystemZ::VectorBytes> Bytes;
5451       if (!getVPermMask(Op, Bytes))
5452         break;
5453       int First;
5454       if (!getShuffleInput(Bytes, Index * BytesPerElement,
5455                            BytesPerElement, First))
5456         break;
5457       if (First < 0)
5458         return DAG.getUNDEF(ResVT);
5459       // Make sure the contiguous sequence starts at a multiple of the
5460       // original element size.
5461       unsigned Byte = unsigned(First) % Bytes.size();
5462       if (Byte % BytesPerElement != 0)
5463         break;
5464       // We can get the extracted value directly from an input.
5465       Index = Byte / BytesPerElement;
5466       Op = Op.getOperand(unsigned(First) / Bytes.size());
5467       Force = true;
5468     } else if (Opcode == ISD::BUILD_VECTOR &&
5469                canTreatAsByteVector(Op.getValueType())) {
5470       // We can only optimize this case if the BUILD_VECTOR elements are
5471       // at least as wide as the extracted value.
5472       EVT OpVT = Op.getValueType();
5473       unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
5474       if (OpBytesPerElement < BytesPerElement)
5475         break;
5476       // Make sure that the least-significant bit of the extracted value
5477       // is the least significant bit of an input.
5478       unsigned End = (Index + 1) * BytesPerElement;
5479       if (End % OpBytesPerElement != 0)
5480         break;
5481       // We're extracting the low part of one operand of the BUILD_VECTOR.
5482       Op = Op.getOperand(End / OpBytesPerElement - 1);
5483       if (!Op.getValueType().isInteger()) {
5484         EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
5485         Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5486         DCI.AddToWorklist(Op.getNode());
5487       }
5488       EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
5489       Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5490       if (VT != ResVT) {
5491         DCI.AddToWorklist(Op.getNode());
5492         Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
5493       }
5494       return Op;
5495     } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
5496                 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
5497                 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5498                canTreatAsByteVector(Op.getValueType()) &&
5499                canTreatAsByteVector(Op.getOperand(0).getValueType())) {
5500       // Make sure that only the unextended bits are significant.
5501       EVT ExtVT = Op.getValueType();
5502       EVT OpVT = Op.getOperand(0).getValueType();
5503       unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
5504       unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
5505       unsigned Byte = Index * BytesPerElement;
5506       unsigned SubByte = Byte % ExtBytesPerElement;
5507       unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
5508       if (SubByte < MinSubByte ||
5509           SubByte + BytesPerElement > ExtBytesPerElement)
5510         break;
5511       // Get the byte offset of the unextended element
5512       Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
5513       // ...then add the byte offset relative to that element.
5514       Byte += SubByte - MinSubByte;
5515       if (Byte % BytesPerElement != 0)
5516         break;
5517       Op = Op.getOperand(0);
5518       Index = Byte / BytesPerElement;
5519       Force = true;
5520     } else
5521       break;
5522   }
5523   if (Force) {
5524     if (Op.getValueType() != VecVT) {
5525       Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
5526       DCI.AddToWorklist(Op.getNode());
5527     }
5528     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
5529                        DAG.getConstant(Index, DL, MVT::i32));
5530   }
5531   return SDValue();
5532 }
5533 
5534 // Optimize vector operations in scalar value Op on the basis that Op
5535 // is truncated to TruncVT.
5536 SDValue SystemZTargetLowering::combineTruncateExtract(
5537     const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
5538   // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
5539   // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
5540   // of type TruncVT.
5541   if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5542       TruncVT.getSizeInBits() % 8 == 0) {
5543     SDValue Vec = Op.getOperand(0);
5544     EVT VecVT = Vec.getValueType();
5545     if (canTreatAsByteVector(VecVT)) {
5546       if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
5547         unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
5548         unsigned TruncBytes = TruncVT.getStoreSize();
5549         if (BytesPerElement % TruncBytes == 0) {
5550           // Calculate the value of Y' in the above description.  We are
5551           // splitting the original elements into Scale equal-sized pieces
5552           // and for truncation purposes want the last (least-significant)
5553           // of these pieces for IndexN.  This is easiest to do by calculating
5554           // the start index of the following element and then subtracting 1.
5555           unsigned Scale = BytesPerElement / TruncBytes;
5556           unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
5557 
5558           // Defer the creation of the bitcast from X to combineExtract,
5559           // which might be able to optimize the extraction.
5560           VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
5561                                    VecVT.getStoreSize() / TruncBytes);
5562           EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
5563           return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
5564         }
5565       }
5566     }
5567   }
5568   return SDValue();
5569 }
5570 
5571 SDValue SystemZTargetLowering::combineZERO_EXTEND(
5572     SDNode *N, DAGCombinerInfo &DCI) const {
5573   // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
5574   SelectionDAG &DAG = DCI.DAG;
5575   SDValue N0 = N->getOperand(0);
5576   EVT VT = N->getValueType(0);
5577   if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
5578     auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
5579     auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5580     if (TrueOp && FalseOp) {
5581       SDLoc DL(N0);
5582       SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
5583                         DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
5584                         N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
5585       SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
5586       // If N0 has multiple uses, change other uses as well.
5587       if (!N0.hasOneUse()) {
5588         SDValue TruncSelect =
5589           DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
5590         DCI.CombineTo(N0.getNode(), TruncSelect);
5591       }
5592       return NewSelect;
5593     }
5594   }
5595   return SDValue();
5596 }
5597 
5598 SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
5599     SDNode *N, DAGCombinerInfo &DCI) const {
5600   // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
5601   // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
5602   // into (select_cc LHS, RHS, -1, 0, COND)
5603   SelectionDAG &DAG = DCI.DAG;
5604   SDValue N0 = N->getOperand(0);
5605   EVT VT = N->getValueType(0);
5606   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
5607   if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
5608     N0 = N0.getOperand(0);
5609   if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
5610     SDLoc DL(N0);
5611     SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
5612                       DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
5613                       N0.getOperand(2) };
5614     return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
5615   }
5616   return SDValue();
5617 }
5618 
5619 SDValue SystemZTargetLowering::combineSIGN_EXTEND(
5620     SDNode *N, DAGCombinerInfo &DCI) const {
5621   // Convert (sext (ashr (shl X, C1), C2)) to
5622   // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
5623   // cheap as narrower ones.
5624   SelectionDAG &DAG = DCI.DAG;
5625   SDValue N0 = N->getOperand(0);
5626   EVT VT = N->getValueType(0);
5627   if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
5628     auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5629     SDValue Inner = N0.getOperand(0);
5630     if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
5631       if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
5632         unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
5633         unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
5634         unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
5635         EVT ShiftVT = N0.getOperand(1).getValueType();
5636         SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
5637                                   Inner.getOperand(0));
5638         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
5639                                   DAG.getConstant(NewShlAmt, SDLoc(Inner),
5640                                                   ShiftVT));
5641         return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
5642                            DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
5643       }
5644     }
5645   }
5646   return SDValue();
5647 }
5648 
5649 SDValue SystemZTargetLowering::combineMERGE(
5650     SDNode *N, DAGCombinerInfo &DCI) const {
5651   SelectionDAG &DAG = DCI.DAG;
5652   unsigned Opcode = N->getOpcode();
5653   SDValue Op0 = N->getOperand(0);
5654   SDValue Op1 = N->getOperand(1);
5655   if (Op0.getOpcode() == ISD::BITCAST)
5656     Op0 = Op0.getOperand(0);
5657   if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
5658     // (z_merge_* 0, 0) -> 0.  This is mostly useful for using VLLEZF
5659     // for v4f32.
5660     if (Op1 == N->getOperand(0))
5661       return Op1;
5662     // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
5663     EVT VT = Op1.getValueType();
5664     unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
5665     if (ElemBytes <= 4) {
5666       Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
5667                 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
5668       EVT InVT = VT.changeVectorElementTypeToInteger();
5669       EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
5670                                    SystemZ::VectorBytes / ElemBytes / 2);
5671       if (VT != InVT) {
5672         Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
5673         DCI.AddToWorklist(Op1.getNode());
5674       }
5675       SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
5676       DCI.AddToWorklist(Op.getNode());
5677       return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
5678     }
5679   }
5680   return SDValue();
5681 }
5682 
5683 SDValue SystemZTargetLowering::combineLOAD(
5684     SDNode *N, DAGCombinerInfo &DCI) const {
5685   SelectionDAG &DAG = DCI.DAG;
5686   EVT LdVT = N->getValueType(0);
5687   if (LdVT.isVector() || LdVT.isInteger())
5688     return SDValue();
5689   // Transform a scalar load that is REPLICATEd as well as having other
5690   // use(s) to the form where the other use(s) use the first element of the
5691   // REPLICATE instead of the load. Otherwise instruction selection will not
5692   // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
5693   // point loads.
5694 
5695   SDValue Replicate;
5696   SmallVector<SDNode*, 8> OtherUses;
5697   for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5698        UI != UE; ++UI) {
5699     if (UI->getOpcode() == SystemZISD::REPLICATE) {
5700       if (Replicate)
5701         return SDValue(); // Should never happen
5702       Replicate = SDValue(*UI, 0);
5703     }
5704     else if (UI.getUse().getResNo() == 0)
5705       OtherUses.push_back(*UI);
5706   }
5707   if (!Replicate || OtherUses.empty())
5708     return SDValue();
5709 
5710   SDLoc DL(N);
5711   SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
5712                               Replicate, DAG.getConstant(0, DL, MVT::i32));
5713   // Update uses of the loaded Value while preserving old chains.
5714   for (SDNode *U : OtherUses) {
5715     SmallVector<SDValue, 8> Ops;
5716     for (SDValue Op : U->ops())
5717       Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
5718     DAG.UpdateNodeOperands(U, Ops);
5719   }
5720   return SDValue(N, 0);
5721 }
5722 
5723 bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
5724   if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
5725     return true;
5726   if (Subtarget.hasVectorEnhancements2())
5727     if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64)
5728       return true;
5729   return false;
5730 }
5731 
5732 static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
5733   if (!VT.isVector() || !VT.isSimple() ||
5734       VT.getSizeInBits() != 128 ||
5735       VT.getScalarSizeInBits() % 8 != 0)
5736     return false;
5737 
5738   unsigned NumElts = VT.getVectorNumElements();
5739   for (unsigned i = 0; i < NumElts; ++i) {
5740     if (M[i] < 0) continue; // ignore UNDEF indices
5741     if ((unsigned) M[i] != NumElts - 1 - i)
5742       return false;
5743   }
5744 
5745   return true;
5746 }
5747 
5748 SDValue SystemZTargetLowering::combineSTORE(
5749     SDNode *N, DAGCombinerInfo &DCI) const {
5750   SelectionDAG &DAG = DCI.DAG;
5751   auto *SN = cast<StoreSDNode>(N);
5752   auto &Op1 = N->getOperand(1);
5753   EVT MemVT = SN->getMemoryVT();
5754   // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
5755   // for the extraction to be done on a vMiN value, so that we can use VSTE.
5756   // If X has wider elements then convert it to:
5757   // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
5758   if (MemVT.isInteger() && SN->isTruncatingStore()) {
5759     if (SDValue Value =
5760             combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
5761       DCI.AddToWorklist(Value.getNode());
5762 
5763       // Rewrite the store with the new form of stored value.
5764       return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
5765                                SN->getBasePtr(), SN->getMemoryVT(),
5766                                SN->getMemOperand());
5767     }
5768   }
5769   // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
5770   if (!SN->isTruncatingStore() &&
5771       Op1.getOpcode() == ISD::BSWAP &&
5772       Op1.getNode()->hasOneUse() &&
5773       canLoadStoreByteSwapped(Op1.getValueType())) {
5774 
5775       SDValue BSwapOp = Op1.getOperand(0);
5776 
5777       if (BSwapOp.getValueType() == MVT::i16)
5778         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
5779 
5780       SDValue Ops[] = {
5781         N->getOperand(0), BSwapOp, N->getOperand(2)
5782       };
5783 
5784       return
5785         DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
5786                                 Ops, MemVT, SN->getMemOperand());
5787     }
5788   // Combine STORE (element-swap) into VSTER
5789   if (!SN->isTruncatingStore() &&
5790       Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
5791       Op1.getNode()->hasOneUse() &&
5792       Subtarget.hasVectorEnhancements2()) {
5793     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
5794     ArrayRef<int> ShuffleMask = SVN->getMask();
5795     if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
5796       SDValue Ops[] = {
5797         N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
5798       };
5799 
5800       return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
5801                                      DAG.getVTList(MVT::Other),
5802                                      Ops, MemVT, SN->getMemOperand());
5803     }
5804   }
5805 
5806   return SDValue();
5807 }
5808 
5809 SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
5810     SDNode *N, DAGCombinerInfo &DCI) const {
5811   SelectionDAG &DAG = DCI.DAG;
5812   // Combine element-swap (LOAD) into VLER
5813   if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
5814       N->getOperand(0).hasOneUse() &&
5815       Subtarget.hasVectorEnhancements2()) {
5816     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
5817     ArrayRef<int> ShuffleMask = SVN->getMask();
5818     if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
5819       SDValue Load = N->getOperand(0);
5820       LoadSDNode *LD = cast<LoadSDNode>(Load);
5821 
5822       // Create the element-swapping load.
5823       SDValue Ops[] = {
5824         LD->getChain(),    // Chain
5825         LD->getBasePtr()   // Ptr
5826       };
5827       SDValue ESLoad =
5828         DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
5829                                 DAG.getVTList(LD->getValueType(0), MVT::Other),
5830                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
5831 
5832       // First, combine the VECTOR_SHUFFLE away.  This makes the value produced
5833       // by the load dead.
5834       DCI.CombineTo(N, ESLoad);
5835 
5836       // Next, combine the load away, we give it a bogus result value but a real
5837       // chain result.  The result value is dead because the shuffle is dead.
5838       DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
5839 
5840       // Return N so it doesn't get rechecked!
5841       return SDValue(N, 0);
5842     }
5843   }
5844 
5845   return SDValue();
5846 }
5847 
5848 SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
5849     SDNode *N, DAGCombinerInfo &DCI) const {
5850   SelectionDAG &DAG = DCI.DAG;
5851 
5852   if (!Subtarget.hasVector())
5853     return SDValue();
5854 
5855   // Look through bitcasts that retain the number of vector elements.
5856   SDValue Op = N->getOperand(0);
5857   if (Op.getOpcode() == ISD::BITCAST &&
5858       Op.getValueType().isVector() &&
5859       Op.getOperand(0).getValueType().isVector() &&
5860       Op.getValueType().getVectorNumElements() ==
5861       Op.getOperand(0).getValueType().getVectorNumElements())
5862     Op = Op.getOperand(0);
5863 
5864   // Pull BSWAP out of a vector extraction.
5865   if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
5866     EVT VecVT = Op.getValueType();
5867     EVT EltVT = VecVT.getVectorElementType();
5868     Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
5869                      Op.getOperand(0), N->getOperand(1));
5870     DCI.AddToWorklist(Op.getNode());
5871     Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
5872     if (EltVT != N->getValueType(0)) {
5873       DCI.AddToWorklist(Op.getNode());
5874       Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
5875     }
5876     return Op;
5877   }
5878 
5879   // Try to simplify a vector extraction.
5880   if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
5881     SDValue Op0 = N->getOperand(0);
5882     EVT VecVT = Op0.getValueType();
5883     return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
5884                           IndexN->getZExtValue(), DCI, false);
5885   }
5886   return SDValue();
5887 }
5888 
5889 SDValue SystemZTargetLowering::combineJOIN_DWORDS(
5890     SDNode *N, DAGCombinerInfo &DCI) const {
5891   SelectionDAG &DAG = DCI.DAG;
5892   // (join_dwords X, X) == (replicate X)
5893   if (N->getOperand(0) == N->getOperand(1))
5894     return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
5895                        N->getOperand(0));
5896   return SDValue();
5897 }
5898 
5899 SDValue SystemZTargetLowering::combineFP_ROUND(
5900     SDNode *N, DAGCombinerInfo &DCI) const {
5901 
5902   if (!Subtarget.hasVector())
5903     return SDValue();
5904 
5905   // (fpround (extract_vector_elt X 0))
5906   // (fpround (extract_vector_elt X 1)) ->
5907   // (extract_vector_elt (VROUND X) 0)
5908   // (extract_vector_elt (VROUND X) 2)
5909   //
5910   // This is a special case since the target doesn't really support v2f32s.
5911   SelectionDAG &DAG = DCI.DAG;
5912   SDValue Op0 = N->getOperand(0);
5913   if (N->getValueType(0) == MVT::f32 &&
5914       Op0.hasOneUse() &&
5915       Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5916       Op0.getOperand(0).getValueType() == MVT::v2f64 &&
5917       Op0.getOperand(1).getOpcode() == ISD::Constant &&
5918       cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
5919     SDValue Vec = Op0.getOperand(0);
5920     for (auto *U : Vec->uses()) {
5921       if (U != Op0.getNode() &&
5922           U->hasOneUse() &&
5923           U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5924           U->getOperand(0) == Vec &&
5925           U->getOperand(1).getOpcode() == ISD::Constant &&
5926           cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
5927         SDValue OtherRound = SDValue(*U->use_begin(), 0);
5928         if (OtherRound.getOpcode() == ISD::FP_ROUND &&
5929             OtherRound.getOperand(0) == SDValue(U, 0) &&
5930             OtherRound.getValueType() == MVT::f32) {
5931           SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
5932                                        MVT::v4f32, Vec);
5933           DCI.AddToWorklist(VRound.getNode());
5934           SDValue Extract1 =
5935             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
5936                         VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
5937           DCI.AddToWorklist(Extract1.getNode());
5938           DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
5939           SDValue Extract0 =
5940             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
5941                         VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
5942           return Extract0;
5943         }
5944       }
5945     }
5946   }
5947   return SDValue();
5948 }
5949 
5950 SDValue SystemZTargetLowering::combineFP_EXTEND(
5951     SDNode *N, DAGCombinerInfo &DCI) const {
5952 
5953   if (!Subtarget.hasVector())
5954     return SDValue();
5955 
5956   // (fpextend (extract_vector_elt X 0))
5957   // (fpextend (extract_vector_elt X 2)) ->
5958   // (extract_vector_elt (VEXTEND X) 0)
5959   // (extract_vector_elt (VEXTEND X) 1)
5960   //
5961   // This is a special case since the target doesn't really support v2f32s.
5962   SelectionDAG &DAG = DCI.DAG;
5963   SDValue Op0 = N->getOperand(0);
5964   if (N->getValueType(0) == MVT::f64 &&
5965       Op0.hasOneUse() &&
5966       Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5967       Op0.getOperand(0).getValueType() == MVT::v4f32 &&
5968       Op0.getOperand(1).getOpcode() == ISD::Constant &&
5969       cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
5970     SDValue Vec = Op0.getOperand(0);
5971     for (auto *U : Vec->uses()) {
5972       if (U != Op0.getNode() &&
5973           U->hasOneUse() &&
5974           U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5975           U->getOperand(0) == Vec &&
5976           U->getOperand(1).getOpcode() == ISD::Constant &&
5977           cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
5978         SDValue OtherExtend = SDValue(*U->use_begin(), 0);
5979         if (OtherExtend.getOpcode() == ISD::FP_EXTEND &&
5980             OtherExtend.getOperand(0) == SDValue(U, 0) &&
5981             OtherExtend.getValueType() == MVT::f64) {
5982           SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
5983                                         MVT::v2f64, Vec);
5984           DCI.AddToWorklist(VExtend.getNode());
5985           SDValue Extract1 =
5986             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
5987                         VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
5988           DCI.AddToWorklist(Extract1.getNode());
5989           DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
5990           SDValue Extract0 =
5991             DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
5992                         VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
5993           return Extract0;
5994         }
5995       }
5996     }
5997   }
5998   return SDValue();
5999 }
6000 
6001 SDValue SystemZTargetLowering::combineBSWAP(
6002     SDNode *N, DAGCombinerInfo &DCI) const {
6003   SelectionDAG &DAG = DCI.DAG;
6004   // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
6005   if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
6006       N->getOperand(0).hasOneUse() &&
6007       canLoadStoreByteSwapped(N->getValueType(0))) {
6008       SDValue Load = N->getOperand(0);
6009       LoadSDNode *LD = cast<LoadSDNode>(Load);
6010 
6011       // Create the byte-swapping load.
6012       SDValue Ops[] = {
6013         LD->getChain(),    // Chain
6014         LD->getBasePtr()   // Ptr
6015       };
6016       EVT LoadVT = N->getValueType(0);
6017       if (LoadVT == MVT::i16)
6018         LoadVT = MVT::i32;
6019       SDValue BSLoad =
6020         DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
6021                                 DAG.getVTList(LoadVT, MVT::Other),
6022                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
6023 
6024       // If this is an i16 load, insert the truncate.
6025       SDValue ResVal = BSLoad;
6026       if (N->getValueType(0) == MVT::i16)
6027         ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
6028 
6029       // First, combine the bswap away.  This makes the value produced by the
6030       // load dead.
6031       DCI.CombineTo(N, ResVal);
6032 
6033       // Next, combine the load away, we give it a bogus result value but a real
6034       // chain result.  The result value is dead because the bswap is dead.
6035       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
6036 
6037       // Return N so it doesn't get rechecked!
6038       return SDValue(N, 0);
6039     }
6040 
6041   // Look through bitcasts that retain the number of vector elements.
6042   SDValue Op = N->getOperand(0);
6043   if (Op.getOpcode() == ISD::BITCAST &&
6044       Op.getValueType().isVector() &&
6045       Op.getOperand(0).getValueType().isVector() &&
6046       Op.getValueType().getVectorNumElements() ==
6047       Op.getOperand(0).getValueType().getVectorNumElements())
6048     Op = Op.getOperand(0);
6049 
6050   // Push BSWAP into a vector insertion if at least one side then simplifies.
6051   if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
6052     SDValue Vec = Op.getOperand(0);
6053     SDValue Elt = Op.getOperand(1);
6054     SDValue Idx = Op.getOperand(2);
6055 
6056     if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) ||
6057         Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
6058         DAG.isConstantIntBuildVectorOrConstantInt(Elt) ||
6059         Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
6060         (canLoadStoreByteSwapped(N->getValueType(0)) &&
6061          ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
6062       EVT VecVT = N->getValueType(0);
6063       EVT EltVT = N->getValueType(0).getVectorElementType();
6064       if (VecVT != Vec.getValueType()) {
6065         Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
6066         DCI.AddToWorklist(Vec.getNode());
6067       }
6068       if (EltVT != Elt.getValueType()) {
6069         Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
6070         DCI.AddToWorklist(Elt.getNode());
6071       }
6072       Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
6073       DCI.AddToWorklist(Vec.getNode());
6074       Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
6075       DCI.AddToWorklist(Elt.getNode());
6076       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
6077                          Vec, Elt, Idx);
6078     }
6079   }
6080 
6081   // Push BSWAP into a vector shuffle if at least one side then simplifies.
6082   ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
6083   if (SV && Op.hasOneUse()) {
6084     SDValue Op0 = Op.getOperand(0);
6085     SDValue Op1 = Op.getOperand(1);
6086 
6087     if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
6088         Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
6089         DAG.isConstantIntBuildVectorOrConstantInt(Op1) ||
6090         Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
6091       EVT VecVT = N->getValueType(0);
6092       if (VecVT != Op0.getValueType()) {
6093         Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
6094         DCI.AddToWorklist(Op0.getNode());
6095       }
6096       if (VecVT != Op1.getValueType()) {
6097         Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
6098         DCI.AddToWorklist(Op1.getNode());
6099       }
6100       Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
6101       DCI.AddToWorklist(Op0.getNode());
6102       Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
6103       DCI.AddToWorklist(Op1.getNode());
6104       return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
6105     }
6106   }
6107 
6108   return SDValue();
6109 }
6110 
6111 static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
6112   // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
6113   // set by the CCReg instruction using the CCValid / CCMask masks,
6114   // If the CCReg instruction is itself a ICMP testing the condition
6115   // code set by some other instruction, see whether we can directly
6116   // use that condition code.
6117 
6118   // Verify that we have an ICMP against some constant.
6119   if (CCValid != SystemZ::CCMASK_ICMP)
6120     return false;
6121   auto *ICmp = CCReg.getNode();
6122   if (ICmp->getOpcode() != SystemZISD::ICMP)
6123     return false;
6124   auto *CompareLHS = ICmp->getOperand(0).getNode();
6125   auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
6126   if (!CompareRHS)
6127     return false;
6128 
6129   // Optimize the case where CompareLHS is a SELECT_CCMASK.
6130   if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
6131     // Verify that we have an appropriate mask for a EQ or NE comparison.
6132     bool Invert = false;
6133     if (CCMask == SystemZ::CCMASK_CMP_NE)
6134       Invert = !Invert;
6135     else if (CCMask != SystemZ::CCMASK_CMP_EQ)
6136       return false;
6137 
6138     // Verify that the ICMP compares against one of select values.
6139     auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
6140     if (!TrueVal)
6141       return false;
6142     auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
6143     if (!FalseVal)
6144       return false;
6145     if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
6146       Invert = !Invert;
6147     else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
6148       return false;
6149 
6150     // Compute the effective CC mask for the new branch or select.
6151     auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
6152     auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
6153     if (!NewCCValid || !NewCCMask)
6154       return false;
6155     CCValid = NewCCValid->getZExtValue();
6156     CCMask = NewCCMask->getZExtValue();
6157     if (Invert)
6158       CCMask ^= CCValid;
6159 
6160     // Return the updated CCReg link.
6161     CCReg = CompareLHS->getOperand(4);
6162     return true;
6163   }
6164 
6165   // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
6166   if (CompareLHS->getOpcode() == ISD::SRA) {
6167     auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
6168     if (!SRACount || SRACount->getZExtValue() != 30)
6169       return false;
6170     auto *SHL = CompareLHS->getOperand(0).getNode();
6171     if (SHL->getOpcode() != ISD::SHL)
6172       return false;
6173     auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
6174     if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
6175       return false;
6176     auto *IPM = SHL->getOperand(0).getNode();
6177     if (IPM->getOpcode() != SystemZISD::IPM)
6178       return false;
6179 
6180     // Avoid introducing CC spills (because SRA would clobber CC).
6181     if (!CompareLHS->hasOneUse())
6182       return false;
6183     // Verify that the ICMP compares against zero.
6184     if (CompareRHS->getZExtValue() != 0)
6185       return false;
6186 
6187     // Compute the effective CC mask for the new branch or select.
6188     switch (CCMask) {
6189     case SystemZ::CCMASK_CMP_EQ: break;
6190     case SystemZ::CCMASK_CMP_NE: break;
6191     case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break;
6192     case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break;
6193     case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break;
6194     case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break;
6195     default: return false;
6196     }
6197 
6198     // Return the updated CCReg link.
6199     CCReg = IPM->getOperand(0);
6200     return true;
6201   }
6202 
6203   return false;
6204 }
6205 
6206 SDValue SystemZTargetLowering::combineBR_CCMASK(
6207     SDNode *N, DAGCombinerInfo &DCI) const {
6208   SelectionDAG &DAG = DCI.DAG;
6209 
6210   // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
6211   auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
6212   auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
6213   if (!CCValid || !CCMask)
6214     return SDValue();
6215 
6216   int CCValidVal = CCValid->getZExtValue();
6217   int CCMaskVal = CCMask->getZExtValue();
6218   SDValue Chain = N->getOperand(0);
6219   SDValue CCReg = N->getOperand(4);
6220 
6221   if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
6222     return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
6223                        Chain,
6224                        DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
6225                        DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
6226                        N->getOperand(3), CCReg);
6227   return SDValue();
6228 }
6229 
6230 SDValue SystemZTargetLowering::combineSELECT_CCMASK(
6231     SDNode *N, DAGCombinerInfo &DCI) const {
6232   SelectionDAG &DAG = DCI.DAG;
6233 
6234   // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
6235   auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
6236   auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
6237   if (!CCValid || !CCMask)
6238     return SDValue();
6239 
6240   int CCValidVal = CCValid->getZExtValue();
6241   int CCMaskVal = CCMask->getZExtValue();
6242   SDValue CCReg = N->getOperand(4);
6243 
6244   if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
6245     return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
6246                        N->getOperand(0), N->getOperand(1),
6247                        DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
6248                        DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
6249                        CCReg);
6250   return SDValue();
6251 }
6252 
6253 
6254 SDValue SystemZTargetLowering::combineGET_CCMASK(
6255     SDNode *N, DAGCombinerInfo &DCI) const {
6256 
6257   // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
6258   auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
6259   auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
6260   if (!CCValid || !CCMask)
6261     return SDValue();
6262   int CCValidVal = CCValid->getZExtValue();
6263   int CCMaskVal = CCMask->getZExtValue();
6264 
6265   SDValue Select = N->getOperand(0);
6266   if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
6267     return SDValue();
6268 
6269   auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
6270   auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
6271   if (!SelectCCValid || !SelectCCMask)
6272     return SDValue();
6273   int SelectCCValidVal = SelectCCValid->getZExtValue();
6274   int SelectCCMaskVal = SelectCCMask->getZExtValue();
6275 
6276   auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
6277   auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
6278   if (!TrueVal || !FalseVal)
6279     return SDValue();
6280   if (TrueVal->getZExtValue() != 0 && FalseVal->getZExtValue() == 0)
6281     ;
6282   else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() != 0)
6283     SelectCCMaskVal ^= SelectCCValidVal;
6284   else
6285     return SDValue();
6286 
6287   if (SelectCCValidVal & ~CCValidVal)
6288     return SDValue();
6289   if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
6290     return SDValue();
6291 
6292   return Select->getOperand(4);
6293 }
6294 
6295 SDValue SystemZTargetLowering::combineIntDIVREM(
6296     SDNode *N, DAGCombinerInfo &DCI) const {
6297   SelectionDAG &DAG = DCI.DAG;
6298   EVT VT = N->getValueType(0);
6299   // In the case where the divisor is a vector of constants a cheaper
6300   // sequence of instructions can replace the divide. BuildSDIV is called to
6301   // do this during DAG combining, but it only succeeds when it can build a
6302   // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
6303   // since it is not Legal but Custom it can only happen before
6304   // legalization. Therefore we must scalarize this early before Combine
6305   // 1. For widened vectors, this is already the result of type legalization.
6306   if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
6307       DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
6308     return DAG.UnrollVectorOp(N);
6309   return SDValue();
6310 }
6311 
6312 SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
6313   if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
6314     return N->getOperand(0);
6315   return N;
6316 }
6317 
6318 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
6319                                                  DAGCombinerInfo &DCI) const {
6320   switch(N->getOpcode()) {
6321   default: break;
6322   case ISD::ZERO_EXTEND:        return combineZERO_EXTEND(N, DCI);
6323   case ISD::SIGN_EXTEND:        return combineSIGN_EXTEND(N, DCI);
6324   case ISD::SIGN_EXTEND_INREG:  return combineSIGN_EXTEND_INREG(N, DCI);
6325   case SystemZISD::MERGE_HIGH:
6326   case SystemZISD::MERGE_LOW:   return combineMERGE(N, DCI);
6327   case ISD::LOAD:               return combineLOAD(N, DCI);
6328   case ISD::STORE:              return combineSTORE(N, DCI);
6329   case ISD::VECTOR_SHUFFLE:     return combineVECTOR_SHUFFLE(N, DCI);
6330   case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
6331   case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
6332   case ISD::FP_ROUND:           return combineFP_ROUND(N, DCI);
6333   case ISD::FP_EXTEND:          return combineFP_EXTEND(N, DCI);
6334   case ISD::BSWAP:              return combineBSWAP(N, DCI);
6335   case SystemZISD::BR_CCMASK:   return combineBR_CCMASK(N, DCI);
6336   case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
6337   case SystemZISD::GET_CCMASK:  return combineGET_CCMASK(N, DCI);
6338   case ISD::SDIV:
6339   case ISD::UDIV:
6340   case ISD::SREM:
6341   case ISD::UREM:               return combineIntDIVREM(N, DCI);
6342   }
6343 
6344   return SDValue();
6345 }
6346 
6347 // Return the demanded elements for the OpNo source operand of Op. DemandedElts
6348 // are for Op.
6349 static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
6350                                     unsigned OpNo) {
6351   EVT VT = Op.getValueType();
6352   unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
6353   APInt SrcDemE;
6354   unsigned Opcode = Op.getOpcode();
6355   if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
6356     unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6357     switch (Id) {
6358     case Intrinsic::s390_vpksh:   // PACKS
6359     case Intrinsic::s390_vpksf:
6360     case Intrinsic::s390_vpksg:
6361     case Intrinsic::s390_vpkshs:  // PACKS_CC
6362     case Intrinsic::s390_vpksfs:
6363     case Intrinsic::s390_vpksgs:
6364     case Intrinsic::s390_vpklsh:  // PACKLS
6365     case Intrinsic::s390_vpklsf:
6366     case Intrinsic::s390_vpklsg:
6367     case Intrinsic::s390_vpklshs: // PACKLS_CC
6368     case Intrinsic::s390_vpklsfs:
6369     case Intrinsic::s390_vpklsgs:
6370       // VECTOR PACK truncates the elements of two source vectors into one.
6371       SrcDemE = DemandedElts;
6372       if (OpNo == 2)
6373         SrcDemE.lshrInPlace(NumElts / 2);
6374       SrcDemE = SrcDemE.trunc(NumElts / 2);
6375       break;
6376       // VECTOR UNPACK extends half the elements of the source vector.
6377     case Intrinsic::s390_vuphb:  // VECTOR UNPACK HIGH
6378     case Intrinsic::s390_vuphh:
6379     case Intrinsic::s390_vuphf:
6380     case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
6381     case Intrinsic::s390_vuplhh:
6382     case Intrinsic::s390_vuplhf:
6383       SrcDemE = APInt(NumElts * 2, 0);
6384       SrcDemE.insertBits(DemandedElts, 0);
6385       break;
6386     case Intrinsic::s390_vuplb:  // VECTOR UNPACK LOW
6387     case Intrinsic::s390_vuplhw:
6388     case Intrinsic::s390_vuplf:
6389     case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
6390     case Intrinsic::s390_vupllh:
6391     case Intrinsic::s390_vupllf:
6392       SrcDemE = APInt(NumElts * 2, 0);
6393       SrcDemE.insertBits(DemandedElts, NumElts);
6394       break;
6395     case Intrinsic::s390_vpdi: {
6396       // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
6397       SrcDemE = APInt(NumElts, 0);
6398       if (!DemandedElts[OpNo - 1])
6399         break;
6400       unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
6401       unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
6402       // Demand input element 0 or 1, given by the mask bit value.
6403       SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
6404       break;
6405     }
6406     case Intrinsic::s390_vsldb: {
6407       // VECTOR SHIFT LEFT DOUBLE BY BYTE
6408       assert(VT == MVT::v16i8 && "Unexpected type.");
6409       unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
6410       assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
6411       unsigned NumSrc0Els = 16 - FirstIdx;
6412       SrcDemE = APInt(NumElts, 0);
6413       if (OpNo == 1) {
6414         APInt DemEls = DemandedElts.trunc(NumSrc0Els);
6415         SrcDemE.insertBits(DemEls, FirstIdx);
6416       } else {
6417         APInt DemEls = DemandedElts.lshr(NumSrc0Els);
6418         SrcDemE.insertBits(DemEls, 0);
6419       }
6420       break;
6421     }
6422     case Intrinsic::s390_vperm:
6423       SrcDemE = APInt(NumElts, 1);
6424       break;
6425     default:
6426       llvm_unreachable("Unhandled intrinsic.");
6427       break;
6428     }
6429   } else {
6430     switch (Opcode) {
6431     case SystemZISD::JOIN_DWORDS:
6432       // Scalar operand.
6433       SrcDemE = APInt(1, 1);
6434       break;
6435     case SystemZISD::SELECT_CCMASK:
6436       SrcDemE = DemandedElts;
6437       break;
6438     default:
6439       llvm_unreachable("Unhandled opcode.");
6440       break;
6441     }
6442   }
6443   return SrcDemE;
6444 }
6445 
6446 static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
6447                                   const APInt &DemandedElts,
6448                                   const SelectionDAG &DAG, unsigned Depth,
6449                                   unsigned OpNo) {
6450   APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
6451   APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
6452   KnownBits LHSKnown =
6453       DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
6454   KnownBits RHSKnown =
6455       DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
6456   Known.Zero = LHSKnown.Zero & RHSKnown.Zero;
6457   Known.One = LHSKnown.One & RHSKnown.One;
6458 }
6459 
6460 void
6461 SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
6462                                                      KnownBits &Known,
6463                                                      const APInt &DemandedElts,
6464                                                      const SelectionDAG &DAG,
6465                                                      unsigned Depth) const {
6466   Known.resetAll();
6467 
6468   // Intrinsic CC result is returned in the two low bits.
6469   unsigned tmp0, tmp1; // not used
6470   if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
6471     Known.Zero.setBitsFrom(2);
6472     return;
6473   }
6474   EVT VT = Op.getValueType();
6475   if (Op.getResNo() != 0 || VT == MVT::Untyped)
6476     return;
6477   assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
6478           "KnownBits does not match VT in bitwidth");
6479   assert ((!VT.isVector() ||
6480            (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
6481           "DemandedElts does not match VT number of elements");
6482   unsigned BitWidth = Known.getBitWidth();
6483   unsigned Opcode = Op.getOpcode();
6484   if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
6485     bool IsLogical = false;
6486     unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6487     switch (Id) {
6488     case Intrinsic::s390_vpksh:   // PACKS
6489     case Intrinsic::s390_vpksf:
6490     case Intrinsic::s390_vpksg:
6491     case Intrinsic::s390_vpkshs:  // PACKS_CC
6492     case Intrinsic::s390_vpksfs:
6493     case Intrinsic::s390_vpksgs:
6494     case Intrinsic::s390_vpklsh:  // PACKLS
6495     case Intrinsic::s390_vpklsf:
6496     case Intrinsic::s390_vpklsg:
6497     case Intrinsic::s390_vpklshs: // PACKLS_CC
6498     case Intrinsic::s390_vpklsfs:
6499     case Intrinsic::s390_vpklsgs:
6500     case Intrinsic::s390_vpdi:
6501     case Intrinsic::s390_vsldb:
6502     case Intrinsic::s390_vperm:
6503       computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
6504       break;
6505     case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
6506     case Intrinsic::s390_vuplhh:
6507     case Intrinsic::s390_vuplhf:
6508     case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
6509     case Intrinsic::s390_vupllh:
6510     case Intrinsic::s390_vupllf:
6511       IsLogical = true;
6512       LLVM_FALLTHROUGH;
6513     case Intrinsic::s390_vuphb:  // VECTOR UNPACK HIGH
6514     case Intrinsic::s390_vuphh:
6515     case Intrinsic::s390_vuphf:
6516     case Intrinsic::s390_vuplb:  // VECTOR UNPACK LOW
6517     case Intrinsic::s390_vuplhw:
6518     case Intrinsic::s390_vuplf: {
6519       SDValue SrcOp = Op.getOperand(1);
6520       APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
6521       Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
6522       if (IsLogical) {
6523         Known = Known.zext(BitWidth, true);
6524       } else
6525         Known = Known.sext(BitWidth);
6526       break;
6527     }
6528     default:
6529       break;
6530     }
6531   } else {
6532     switch (Opcode) {
6533     case SystemZISD::JOIN_DWORDS:
6534     case SystemZISD::SELECT_CCMASK:
6535       computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
6536       break;
6537     case SystemZISD::REPLICATE: {
6538       SDValue SrcOp = Op.getOperand(0);
6539       Known = DAG.computeKnownBits(SrcOp, Depth + 1);
6540       if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
6541         Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
6542       break;
6543     }
6544     default:
6545       break;
6546     }
6547   }
6548 
6549   // Known has the width of the source operand(s). Adjust if needed to match
6550   // the passed bitwidth.
6551   if (Known.getBitWidth() != BitWidth)
6552     Known = Known.zextOrTrunc(BitWidth, false);
6553 }
6554 
6555 static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
6556                                         const SelectionDAG &DAG, unsigned Depth,
6557                                         unsigned OpNo) {
6558   APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
6559   unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
6560   if (LHS == 1) return 1; // Early out.
6561   APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
6562   unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
6563   if (RHS == 1) return 1; // Early out.
6564   unsigned Common = std::min(LHS, RHS);
6565   unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
6566   EVT VT = Op.getValueType();
6567   unsigned VTBits = VT.getScalarSizeInBits();
6568   if (SrcBitWidth > VTBits) { // PACK
6569     unsigned SrcExtraBits = SrcBitWidth - VTBits;
6570     if (Common > SrcExtraBits)
6571       return (Common - SrcExtraBits);
6572     return 1;
6573   }
6574   assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
6575   return Common;
6576 }
6577 
6578 unsigned
6579 SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
6580     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
6581     unsigned Depth) const {
6582   if (Op.getResNo() != 0)
6583     return 1;
6584   unsigned Opcode = Op.getOpcode();
6585   if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
6586     unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6587     switch (Id) {
6588     case Intrinsic::s390_vpksh:   // PACKS
6589     case Intrinsic::s390_vpksf:
6590     case Intrinsic::s390_vpksg:
6591     case Intrinsic::s390_vpkshs:  // PACKS_CC
6592     case Intrinsic::s390_vpksfs:
6593     case Intrinsic::s390_vpksgs:
6594     case Intrinsic::s390_vpklsh:  // PACKLS
6595     case Intrinsic::s390_vpklsf:
6596     case Intrinsic::s390_vpklsg:
6597     case Intrinsic::s390_vpklshs: // PACKLS_CC
6598     case Intrinsic::s390_vpklsfs:
6599     case Intrinsic::s390_vpklsgs:
6600     case Intrinsic::s390_vpdi:
6601     case Intrinsic::s390_vsldb:
6602     case Intrinsic::s390_vperm:
6603       return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
6604     case Intrinsic::s390_vuphb:  // VECTOR UNPACK HIGH
6605     case Intrinsic::s390_vuphh:
6606     case Intrinsic::s390_vuphf:
6607     case Intrinsic::s390_vuplb:  // VECTOR UNPACK LOW
6608     case Intrinsic::s390_vuplhw:
6609     case Intrinsic::s390_vuplf: {
6610       SDValue PackedOp = Op.getOperand(1);
6611       APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
6612       unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
6613       EVT VT = Op.getValueType();
6614       unsigned VTBits = VT.getScalarSizeInBits();
6615       Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
6616       return Tmp;
6617     }
6618     default:
6619       break;
6620     }
6621   } else {
6622     switch (Opcode) {
6623     case SystemZISD::SELECT_CCMASK:
6624       return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
6625     default:
6626       break;
6627     }
6628   }
6629 
6630   return 1;
6631 }
6632 
6633 //===----------------------------------------------------------------------===//
6634 // Custom insertion
6635 //===----------------------------------------------------------------------===//
6636 
6637 // Create a new basic block after MBB.
6638 static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
6639   MachineFunction &MF = *MBB->getParent();
6640   MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
6641   MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
6642   return NewMBB;
6643 }
6644 
6645 // Split MBB after MI and return the new block (the one that contains
6646 // instructions after MI).
6647 static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
6648                                           MachineBasicBlock *MBB) {
6649   MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
6650   NewMBB->splice(NewMBB->begin(), MBB,
6651                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
6652   NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
6653   return NewMBB;
6654 }
6655 
6656 // Split MBB before MI and return the new block (the one that contains MI).
6657 static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
6658                                            MachineBasicBlock *MBB) {
6659   MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
6660   NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
6661   NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
6662   return NewMBB;
6663 }
6664 
6665 // Force base value Base into a register before MI.  Return the register.
6666 static Register forceReg(MachineInstr &MI, MachineOperand &Base,
6667                          const SystemZInstrInfo *TII) {
6668   if (Base.isReg())
6669     return Base.getReg();
6670 
6671   MachineBasicBlock *MBB = MI.getParent();
6672   MachineFunction &MF = *MBB->getParent();
6673   MachineRegisterInfo &MRI = MF.getRegInfo();
6674 
6675   Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
6676   BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
6677       .add(Base)
6678       .addImm(0)
6679       .addReg(0);
6680   return Reg;
6681 }
6682 
6683 // The CC operand of MI might be missing a kill marker because there
6684 // were multiple uses of CC, and ISel didn't know which to mark.
6685 // Figure out whether MI should have had a kill marker.
6686 static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) {
6687   // Scan forward through BB for a use/def of CC.
6688   MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI)));
6689   for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
6690     const MachineInstr& mi = *miI;
6691     if (mi.readsRegister(SystemZ::CC))
6692       return false;
6693     if (mi.definesRegister(SystemZ::CC))
6694       break; // Should have kill-flag - update below.
6695   }
6696 
6697   // If we hit the end of the block, check whether CC is live into a
6698   // successor.
6699   if (miI == MBB->end()) {
6700     for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI)
6701       if ((*SI)->isLiveIn(SystemZ::CC))
6702         return false;
6703   }
6704 
6705   return true;
6706 }
6707 
6708 // Return true if it is OK for this Select pseudo-opcode to be cascaded
6709 // together with other Select pseudo-opcodes into a single basic-block with
6710 // a conditional jump around it.
6711 static bool isSelectPseudo(MachineInstr &MI) {
6712   switch (MI.getOpcode()) {
6713   case SystemZ::Select32:
6714   case SystemZ::Select64:
6715   case SystemZ::SelectF32:
6716   case SystemZ::SelectF64:
6717   case SystemZ::SelectF128:
6718   case SystemZ::SelectVR32:
6719   case SystemZ::SelectVR64:
6720   case SystemZ::SelectVR128:
6721     return true;
6722 
6723   default:
6724     return false;
6725   }
6726 }
6727 
6728 // Helper function, which inserts PHI functions into SinkMBB:
6729 //   %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
6730 // where %FalseValue(i) and %TrueValue(i) are taken from Selects.
6731 static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
6732                                  MachineBasicBlock *TrueMBB,
6733                                  MachineBasicBlock *FalseMBB,
6734                                  MachineBasicBlock *SinkMBB) {
6735   MachineFunction *MF = TrueMBB->getParent();
6736   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
6737 
6738   MachineInstr *FirstMI = Selects.front();
6739   unsigned CCValid = FirstMI->getOperand(3).getImm();
6740   unsigned CCMask = FirstMI->getOperand(4).getImm();
6741 
6742   MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
6743 
6744   // As we are creating the PHIs, we have to be careful if there is more than
6745   // one.  Later Selects may reference the results of earlier Selects, but later
6746   // PHIs have to reference the individual true/false inputs from earlier PHIs.
6747   // That also means that PHI construction must work forward from earlier to
6748   // later, and that the code must maintain a mapping from earlier PHI's
6749   // destination registers, and the registers that went into the PHI.
6750   DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
6751 
6752   for (auto MI : Selects) {
6753     Register DestReg = MI->getOperand(0).getReg();
6754     Register TrueReg = MI->getOperand(1).getReg();
6755     Register FalseReg = MI->getOperand(2).getReg();
6756 
6757     // If this Select we are generating is the opposite condition from
6758     // the jump we generated, then we have to swap the operands for the
6759     // PHI that is going to be generated.
6760     if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
6761       std::swap(TrueReg, FalseReg);
6762 
6763     if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end())
6764       TrueReg = RegRewriteTable[TrueReg].first;
6765 
6766     if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end())
6767       FalseReg = RegRewriteTable[FalseReg].second;
6768 
6769     DebugLoc DL = MI->getDebugLoc();
6770     BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
6771       .addReg(TrueReg).addMBB(TrueMBB)
6772       .addReg(FalseReg).addMBB(FalseMBB);
6773 
6774     // Add this PHI to the rewrite table.
6775     RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
6776   }
6777 
6778   MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
6779 }
6780 
6781 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
6782 MachineBasicBlock *
6783 SystemZTargetLowering::emitSelect(MachineInstr &MI,
6784                                   MachineBasicBlock *MBB) const {
6785   assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
6786   const SystemZInstrInfo *TII =
6787       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6788 
6789   unsigned CCValid = MI.getOperand(3).getImm();
6790   unsigned CCMask = MI.getOperand(4).getImm();
6791 
6792   // If we have a sequence of Select* pseudo instructions using the
6793   // same condition code value, we want to expand all of them into
6794   // a single pair of basic blocks using the same condition.
6795   SmallVector<MachineInstr*, 8> Selects;
6796   SmallVector<MachineInstr*, 8> DbgValues;
6797   Selects.push_back(&MI);
6798   unsigned Count = 0;
6799   for (MachineBasicBlock::iterator NextMIIt =
6800          std::next(MachineBasicBlock::iterator(MI));
6801        NextMIIt != MBB->end(); ++NextMIIt) {
6802     if (NextMIIt->definesRegister(SystemZ::CC))
6803       break;
6804     if (isSelectPseudo(*NextMIIt)) {
6805       assert(NextMIIt->getOperand(3).getImm() == CCValid &&
6806              "Bad CCValid operands since CC was not redefined.");
6807       if (NextMIIt->getOperand(4).getImm() == CCMask ||
6808           NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask)) {
6809         Selects.push_back(&*NextMIIt);
6810         continue;
6811       }
6812       break;
6813     }
6814     bool User = false;
6815     for (auto SelMI : Selects)
6816       if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) {
6817         User = true;
6818         break;
6819       }
6820     if (NextMIIt->isDebugInstr()) {
6821       if (User) {
6822         assert(NextMIIt->isDebugValue() && "Unhandled debug opcode.");
6823         DbgValues.push_back(&*NextMIIt);
6824       }
6825     }
6826     else if (User || ++Count > 20)
6827       break;
6828   }
6829 
6830   MachineInstr *LastMI = Selects.back();
6831   bool CCKilled =
6832       (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
6833   MachineBasicBlock *StartMBB = MBB;
6834   MachineBasicBlock *JoinMBB  = splitBlockAfter(LastMI, MBB);
6835   MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
6836 
6837   // Unless CC was killed in the last Select instruction, mark it as
6838   // live-in to both FalseMBB and JoinMBB.
6839   if (!CCKilled) {
6840     FalseMBB->addLiveIn(SystemZ::CC);
6841     JoinMBB->addLiveIn(SystemZ::CC);
6842   }
6843 
6844   //  StartMBB:
6845   //   BRC CCMask, JoinMBB
6846   //   # fallthrough to FalseMBB
6847   MBB = StartMBB;
6848   BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
6849     .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
6850   MBB->addSuccessor(JoinMBB);
6851   MBB->addSuccessor(FalseMBB);
6852 
6853   //  FalseMBB:
6854   //   # fallthrough to JoinMBB
6855   MBB = FalseMBB;
6856   MBB->addSuccessor(JoinMBB);
6857 
6858   //  JoinMBB:
6859   //   %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
6860   //  ...
6861   MBB = JoinMBB;
6862   createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
6863   for (auto SelMI : Selects)
6864     SelMI->eraseFromParent();
6865 
6866   MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
6867   for (auto DbgMI : DbgValues)
6868     MBB->splice(InsertPos, StartMBB, DbgMI);
6869 
6870   return JoinMBB;
6871 }
6872 
6873 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
6874 // StoreOpcode is the store to use and Invert says whether the store should
6875 // happen when the condition is false rather than true.  If a STORE ON
6876 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
6877 MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
6878                                                         MachineBasicBlock *MBB,
6879                                                         unsigned StoreOpcode,
6880                                                         unsigned STOCOpcode,
6881                                                         bool Invert) const {
6882   const SystemZInstrInfo *TII =
6883       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6884 
6885   Register SrcReg = MI.getOperand(0).getReg();
6886   MachineOperand Base = MI.getOperand(1);
6887   int64_t Disp = MI.getOperand(2).getImm();
6888   Register IndexReg = MI.getOperand(3).getReg();
6889   unsigned CCValid = MI.getOperand(4).getImm();
6890   unsigned CCMask = MI.getOperand(5).getImm();
6891   DebugLoc DL = MI.getDebugLoc();
6892 
6893   StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
6894 
6895   // Use STOCOpcode if possible.  We could use different store patterns in
6896   // order to avoid matching the index register, but the performance trade-offs
6897   // might be more complicated in that case.
6898   if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
6899     if (Invert)
6900       CCMask ^= CCValid;
6901 
6902     // ISel pattern matching also adds a load memory operand of the same
6903     // address, so take special care to find the storing memory operand.
6904     MachineMemOperand *MMO = nullptr;
6905     for (auto *I : MI.memoperands())
6906       if (I->isStore()) {
6907           MMO = I;
6908           break;
6909         }
6910 
6911     BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
6912       .addReg(SrcReg)
6913       .add(Base)
6914       .addImm(Disp)
6915       .addImm(CCValid)
6916       .addImm(CCMask)
6917       .addMemOperand(MMO);
6918 
6919     MI.eraseFromParent();
6920     return MBB;
6921   }
6922 
6923   // Get the condition needed to branch around the store.
6924   if (!Invert)
6925     CCMask ^= CCValid;
6926 
6927   MachineBasicBlock *StartMBB = MBB;
6928   MachineBasicBlock *JoinMBB  = splitBlockBefore(MI, MBB);
6929   MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
6930 
6931   // Unless CC was killed in the CondStore instruction, mark it as
6932   // live-in to both FalseMBB and JoinMBB.
6933   if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) {
6934     FalseMBB->addLiveIn(SystemZ::CC);
6935     JoinMBB->addLiveIn(SystemZ::CC);
6936   }
6937 
6938   //  StartMBB:
6939   //   BRC CCMask, JoinMBB
6940   //   # fallthrough to FalseMBB
6941   MBB = StartMBB;
6942   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6943     .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
6944   MBB->addSuccessor(JoinMBB);
6945   MBB->addSuccessor(FalseMBB);
6946 
6947   //  FalseMBB:
6948   //   store %SrcReg, %Disp(%Index,%Base)
6949   //   # fallthrough to JoinMBB
6950   MBB = FalseMBB;
6951   BuildMI(MBB, DL, TII->get(StoreOpcode))
6952       .addReg(SrcReg)
6953       .add(Base)
6954       .addImm(Disp)
6955       .addReg(IndexReg);
6956   MBB->addSuccessor(JoinMBB);
6957 
6958   MI.eraseFromParent();
6959   return JoinMBB;
6960 }
6961 
6962 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
6963 // or ATOMIC_SWAP{,W} instruction MI.  BinOpcode is the instruction that
6964 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
6965 // BitSize is the width of the field in bits, or 0 if this is a partword
6966 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
6967 // is one of the operands.  Invert says whether the field should be
6968 // inverted after performing BinOpcode (e.g. for NAND).
6969 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
6970     MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
6971     unsigned BitSize, bool Invert) const {
6972   MachineFunction &MF = *MBB->getParent();
6973   const SystemZInstrInfo *TII =
6974       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6975   MachineRegisterInfo &MRI = MF.getRegInfo();
6976   bool IsSubWord = (BitSize < 32);
6977 
6978   // Extract the operands.  Base can be a register or a frame index.
6979   // Src2 can be a register or immediate.
6980   Register Dest = MI.getOperand(0).getReg();
6981   MachineOperand Base = earlyUseOperand(MI.getOperand(1));
6982   int64_t Disp = MI.getOperand(2).getImm();
6983   MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
6984   Register BitShift = IsSubWord ? MI.getOperand(4).getReg() : Register();
6985   Register NegBitShift = IsSubWord ? MI.getOperand(5).getReg() : Register();
6986   DebugLoc DL = MI.getDebugLoc();
6987   if (IsSubWord)
6988     BitSize = MI.getOperand(6).getImm();
6989 
6990   // Subword operations use 32-bit registers.
6991   const TargetRegisterClass *RC = (BitSize <= 32 ?
6992                                    &SystemZ::GR32BitRegClass :
6993                                    &SystemZ::GR64BitRegClass);
6994   unsigned LOpcode  = BitSize <= 32 ? SystemZ::L  : SystemZ::LG;
6995   unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
6996 
6997   // Get the right opcodes for the displacement.
6998   LOpcode  = TII->getOpcodeForOffset(LOpcode,  Disp);
6999   CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
7000   assert(LOpcode && CSOpcode && "Displacement out of range");
7001 
7002   // Create virtual registers for temporary results.
7003   Register OrigVal       = MRI.createVirtualRegister(RC);
7004   Register OldVal        = MRI.createVirtualRegister(RC);
7005   Register NewVal        = (BinOpcode || IsSubWord ?
7006                             MRI.createVirtualRegister(RC) : Src2.getReg());
7007   Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
7008   Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
7009 
7010   // Insert a basic block for the main loop.
7011   MachineBasicBlock *StartMBB = MBB;
7012   MachineBasicBlock *DoneMBB  = splitBlockBefore(MI, MBB);
7013   MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
7014 
7015   //  StartMBB:
7016   //   ...
7017   //   %OrigVal = L Disp(%Base)
7018   //   # fall through to LoopMMB
7019   MBB = StartMBB;
7020   BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
7021   MBB->addSuccessor(LoopMBB);
7022 
7023   //  LoopMBB:
7024   //   %OldVal        = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
7025   //   %RotatedOldVal = RLL %OldVal, 0(%BitShift)
7026   //   %RotatedNewVal = OP %RotatedOldVal, %Src2
7027   //   %NewVal        = RLL %RotatedNewVal, 0(%NegBitShift)
7028   //   %Dest          = CS %OldVal, %NewVal, Disp(%Base)
7029   //   JNE LoopMBB
7030   //   # fall through to DoneMMB
7031   MBB = LoopMBB;
7032   BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
7033     .addReg(OrigVal).addMBB(StartMBB)
7034     .addReg(Dest).addMBB(LoopMBB);
7035   if (IsSubWord)
7036     BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
7037       .addReg(OldVal).addReg(BitShift).addImm(0);
7038   if (Invert) {
7039     // Perform the operation normally and then invert every bit of the field.
7040     Register Tmp = MRI.createVirtualRegister(RC);
7041     BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
7042     if (BitSize <= 32)
7043       // XILF with the upper BitSize bits set.
7044       BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
7045         .addReg(Tmp).addImm(-1U << (32 - BitSize));
7046     else {
7047       // Use LCGR and add -1 to the result, which is more compact than
7048       // an XILF, XILH pair.
7049       Register Tmp2 = MRI.createVirtualRegister(RC);
7050       BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
7051       BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
7052         .addReg(Tmp2).addImm(-1);
7053     }
7054   } else if (BinOpcode)
7055     // A simply binary operation.
7056     BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
7057         .addReg(RotatedOldVal)
7058         .add(Src2);
7059   else if (IsSubWord)
7060     // Use RISBG to rotate Src2 into position and use it to replace the
7061     // field in RotatedOldVal.
7062     BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
7063       .addReg(RotatedOldVal).addReg(Src2.getReg())
7064       .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
7065   if (IsSubWord)
7066     BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
7067       .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
7068   BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
7069       .addReg(OldVal)
7070       .addReg(NewVal)
7071       .add(Base)
7072       .addImm(Disp);
7073   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7074     .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
7075   MBB->addSuccessor(LoopMBB);
7076   MBB->addSuccessor(DoneMBB);
7077 
7078   MI.eraseFromParent();
7079   return DoneMBB;
7080 }
7081 
7082 // Implement EmitInstrWithCustomInserter for pseudo
7083 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI.  CompareOpcode is the
7084 // instruction that should be used to compare the current field with the
7085 // minimum or maximum value.  KeepOldMask is the BRC condition-code mask
7086 // for when the current field should be kept.  BitSize is the width of
7087 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
7088 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
7089     MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
7090     unsigned KeepOldMask, unsigned BitSize) const {
7091   MachineFunction &MF = *MBB->getParent();
7092   const SystemZInstrInfo *TII =
7093       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7094   MachineRegisterInfo &MRI = MF.getRegInfo();
7095   bool IsSubWord = (BitSize < 32);
7096 
7097   // Extract the operands.  Base can be a register or a frame index.
7098   Register Dest = MI.getOperand(0).getReg();
7099   MachineOperand Base = earlyUseOperand(MI.getOperand(1));
7100   int64_t Disp = MI.getOperand(2).getImm();
7101   Register Src2 = MI.getOperand(3).getReg();
7102   Register BitShift = (IsSubWord ? MI.getOperand(4).getReg() : Register());
7103   Register NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : Register());
7104   DebugLoc DL = MI.getDebugLoc();
7105   if (IsSubWord)
7106     BitSize = MI.getOperand(6).getImm();
7107 
7108   // Subword operations use 32-bit registers.
7109   const TargetRegisterClass *RC = (BitSize <= 32 ?
7110                                    &SystemZ::GR32BitRegClass :
7111                                    &SystemZ::GR64BitRegClass);
7112   unsigned LOpcode  = BitSize <= 32 ? SystemZ::L  : SystemZ::LG;
7113   unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
7114 
7115   // Get the right opcodes for the displacement.
7116   LOpcode  = TII->getOpcodeForOffset(LOpcode,  Disp);
7117   CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
7118   assert(LOpcode && CSOpcode && "Displacement out of range");
7119 
7120   // Create virtual registers for temporary results.
7121   Register OrigVal       = MRI.createVirtualRegister(RC);
7122   Register OldVal        = MRI.createVirtualRegister(RC);
7123   Register NewVal        = MRI.createVirtualRegister(RC);
7124   Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
7125   Register RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
7126   Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
7127 
7128   // Insert 3 basic blocks for the loop.
7129   MachineBasicBlock *StartMBB  = MBB;
7130   MachineBasicBlock *DoneMBB   = splitBlockBefore(MI, MBB);
7131   MachineBasicBlock *LoopMBB   = emitBlockAfter(StartMBB);
7132   MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
7133   MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
7134 
7135   //  StartMBB:
7136   //   ...
7137   //   %OrigVal     = L Disp(%Base)
7138   //   # fall through to LoopMMB
7139   MBB = StartMBB;
7140   BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
7141   MBB->addSuccessor(LoopMBB);
7142 
7143   //  LoopMBB:
7144   //   %OldVal        = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
7145   //   %RotatedOldVal = RLL %OldVal, 0(%BitShift)
7146   //   CompareOpcode %RotatedOldVal, %Src2
7147   //   BRC KeepOldMask, UpdateMBB
7148   MBB = LoopMBB;
7149   BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
7150     .addReg(OrigVal).addMBB(StartMBB)
7151     .addReg(Dest).addMBB(UpdateMBB);
7152   if (IsSubWord)
7153     BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
7154       .addReg(OldVal).addReg(BitShift).addImm(0);
7155   BuildMI(MBB, DL, TII->get(CompareOpcode))
7156     .addReg(RotatedOldVal).addReg(Src2);
7157   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7158     .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
7159   MBB->addSuccessor(UpdateMBB);
7160   MBB->addSuccessor(UseAltMBB);
7161 
7162   //  UseAltMBB:
7163   //   %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
7164   //   # fall through to UpdateMMB
7165   MBB = UseAltMBB;
7166   if (IsSubWord)
7167     BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
7168       .addReg(RotatedOldVal).addReg(Src2)
7169       .addImm(32).addImm(31 + BitSize).addImm(0);
7170   MBB->addSuccessor(UpdateMBB);
7171 
7172   //  UpdateMBB:
7173   //   %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
7174   //                        [ %RotatedAltVal, UseAltMBB ]
7175   //   %NewVal        = RLL %RotatedNewVal, 0(%NegBitShift)
7176   //   %Dest          = CS %OldVal, %NewVal, Disp(%Base)
7177   //   JNE LoopMBB
7178   //   # fall through to DoneMMB
7179   MBB = UpdateMBB;
7180   BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
7181     .addReg(RotatedOldVal).addMBB(LoopMBB)
7182     .addReg(RotatedAltVal).addMBB(UseAltMBB);
7183   if (IsSubWord)
7184     BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
7185       .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
7186   BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
7187       .addReg(OldVal)
7188       .addReg(NewVal)
7189       .add(Base)
7190       .addImm(Disp);
7191   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7192     .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
7193   MBB->addSuccessor(LoopMBB);
7194   MBB->addSuccessor(DoneMBB);
7195 
7196   MI.eraseFromParent();
7197   return DoneMBB;
7198 }
7199 
7200 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
7201 // instruction MI.
7202 MachineBasicBlock *
7203 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
7204                                           MachineBasicBlock *MBB) const {
7205 
7206   MachineFunction &MF = *MBB->getParent();
7207   const SystemZInstrInfo *TII =
7208       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7209   MachineRegisterInfo &MRI = MF.getRegInfo();
7210 
7211   // Extract the operands.  Base can be a register or a frame index.
7212   Register Dest = MI.getOperand(0).getReg();
7213   MachineOperand Base = earlyUseOperand(MI.getOperand(1));
7214   int64_t Disp = MI.getOperand(2).getImm();
7215   Register OrigCmpVal = MI.getOperand(3).getReg();
7216   Register OrigSwapVal = MI.getOperand(4).getReg();
7217   Register BitShift = MI.getOperand(5).getReg();
7218   Register NegBitShift = MI.getOperand(6).getReg();
7219   int64_t BitSize = MI.getOperand(7).getImm();
7220   DebugLoc DL = MI.getDebugLoc();
7221 
7222   const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
7223 
7224   // Get the right opcodes for the displacement.
7225   unsigned LOpcode  = TII->getOpcodeForOffset(SystemZ::L,  Disp);
7226   unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
7227   assert(LOpcode && CSOpcode && "Displacement out of range");
7228 
7229   // Create virtual registers for temporary results.
7230   Register OrigOldVal = MRI.createVirtualRegister(RC);
7231   Register OldVal = MRI.createVirtualRegister(RC);
7232   Register CmpVal = MRI.createVirtualRegister(RC);
7233   Register SwapVal = MRI.createVirtualRegister(RC);
7234   Register StoreVal = MRI.createVirtualRegister(RC);
7235   Register RetryOldVal = MRI.createVirtualRegister(RC);
7236   Register RetryCmpVal = MRI.createVirtualRegister(RC);
7237   Register RetrySwapVal = MRI.createVirtualRegister(RC);
7238 
7239   // Insert 2 basic blocks for the loop.
7240   MachineBasicBlock *StartMBB = MBB;
7241   MachineBasicBlock *DoneMBB  = splitBlockBefore(MI, MBB);
7242   MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
7243   MachineBasicBlock *SetMBB   = emitBlockAfter(LoopMBB);
7244 
7245   //  StartMBB:
7246   //   ...
7247   //   %OrigOldVal     = L Disp(%Base)
7248   //   # fall through to LoopMMB
7249   MBB = StartMBB;
7250   BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
7251       .add(Base)
7252       .addImm(Disp)
7253       .addReg(0);
7254   MBB->addSuccessor(LoopMBB);
7255 
7256   //  LoopMBB:
7257   //   %OldVal        = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
7258   //   %CmpVal        = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
7259   //   %SwapVal       = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
7260   //   %Dest          = RLL %OldVal, BitSize(%BitShift)
7261   //                      ^^ The low BitSize bits contain the field
7262   //                         of interest.
7263   //   %RetryCmpVal   = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
7264   //                      ^^ Replace the upper 32-BitSize bits of the
7265   //                         comparison value with those that we loaded,
7266   //                         so that we can use a full word comparison.
7267   //   CR %Dest, %RetryCmpVal
7268   //   JNE DoneMBB
7269   //   # Fall through to SetMBB
7270   MBB = LoopMBB;
7271   BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
7272     .addReg(OrigOldVal).addMBB(StartMBB)
7273     .addReg(RetryOldVal).addMBB(SetMBB);
7274   BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal)
7275     .addReg(OrigCmpVal).addMBB(StartMBB)
7276     .addReg(RetryCmpVal).addMBB(SetMBB);
7277   BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
7278     .addReg(OrigSwapVal).addMBB(StartMBB)
7279     .addReg(RetrySwapVal).addMBB(SetMBB);
7280   BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest)
7281     .addReg(OldVal).addReg(BitShift).addImm(BitSize);
7282   BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
7283     .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
7284   BuildMI(MBB, DL, TII->get(SystemZ::CR))
7285     .addReg(Dest).addReg(RetryCmpVal);
7286   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7287     .addImm(SystemZ::CCMASK_ICMP)
7288     .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB);
7289   MBB->addSuccessor(DoneMBB);
7290   MBB->addSuccessor(SetMBB);
7291 
7292   //  SetMBB:
7293   //   %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
7294   //                      ^^ Replace the upper 32-BitSize bits of the new
7295   //                         value with those that we loaded.
7296   //   %StoreVal    = RLL %RetrySwapVal, -BitSize(%NegBitShift)
7297   //                      ^^ Rotate the new field to its proper position.
7298   //   %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
7299   //   JNE LoopMBB
7300   //   # fall through to ExitMMB
7301   MBB = SetMBB;
7302   BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
7303     .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
7304   BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
7305     .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
7306   BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
7307       .addReg(OldVal)
7308       .addReg(StoreVal)
7309       .add(Base)
7310       .addImm(Disp);
7311   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7312     .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
7313   MBB->addSuccessor(LoopMBB);
7314   MBB->addSuccessor(DoneMBB);
7315 
7316   // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
7317   // to the block after the loop.  At this point, CC may have been defined
7318   // either by the CR in LoopMBB or by the CS in SetMBB.
7319   if (!MI.registerDefIsDead(SystemZ::CC))
7320     DoneMBB->addLiveIn(SystemZ::CC);
7321 
7322   MI.eraseFromParent();
7323   return DoneMBB;
7324 }
7325 
7326 // Emit a move from two GR64s to a GR128.
7327 MachineBasicBlock *
7328 SystemZTargetLowering::emitPair128(MachineInstr &MI,
7329                                    MachineBasicBlock *MBB) const {
7330   MachineFunction &MF = *MBB->getParent();
7331   const SystemZInstrInfo *TII =
7332       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7333   MachineRegisterInfo &MRI = MF.getRegInfo();
7334   DebugLoc DL = MI.getDebugLoc();
7335 
7336   Register Dest = MI.getOperand(0).getReg();
7337   Register Hi = MI.getOperand(1).getReg();
7338   Register Lo = MI.getOperand(2).getReg();
7339   Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
7340   Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
7341 
7342   BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
7343   BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
7344     .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
7345   BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
7346     .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
7347 
7348   MI.eraseFromParent();
7349   return MBB;
7350 }
7351 
7352 // Emit an extension from a GR64 to a GR128.  ClearEven is true
7353 // if the high register of the GR128 value must be cleared or false if
7354 // it's "don't care".
7355 MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
7356                                                      MachineBasicBlock *MBB,
7357                                                      bool ClearEven) const {
7358   MachineFunction &MF = *MBB->getParent();
7359   const SystemZInstrInfo *TII =
7360       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7361   MachineRegisterInfo &MRI = MF.getRegInfo();
7362   DebugLoc DL = MI.getDebugLoc();
7363 
7364   Register Dest = MI.getOperand(0).getReg();
7365   Register Src = MI.getOperand(1).getReg();
7366   Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
7367 
7368   BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
7369   if (ClearEven) {
7370     Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
7371     Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
7372 
7373     BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
7374       .addImm(0);
7375     BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
7376       .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
7377     In128 = NewIn128;
7378   }
7379   BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
7380     .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
7381 
7382   MI.eraseFromParent();
7383   return MBB;
7384 }
7385 
7386 MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
7387     MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
7388   MachineFunction &MF = *MBB->getParent();
7389   const SystemZInstrInfo *TII =
7390       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7391   MachineRegisterInfo &MRI = MF.getRegInfo();
7392   DebugLoc DL = MI.getDebugLoc();
7393 
7394   MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
7395   uint64_t DestDisp = MI.getOperand(1).getImm();
7396   MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
7397   uint64_t SrcDisp = MI.getOperand(3).getImm();
7398   uint64_t Length = MI.getOperand(4).getImm();
7399 
7400   // When generating more than one CLC, all but the last will need to
7401   // branch to the end when a difference is found.
7402   MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
7403                                splitBlockAfter(MI, MBB) : nullptr);
7404 
7405   // Check for the loop form, in which operand 5 is the trip count.
7406   if (MI.getNumExplicitOperands() > 5) {
7407     bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
7408 
7409     Register StartCountReg = MI.getOperand(5).getReg();
7410     Register StartSrcReg   = forceReg(MI, SrcBase, TII);
7411     Register StartDestReg  = (HaveSingleBase ? StartSrcReg :
7412                               forceReg(MI, DestBase, TII));
7413 
7414     const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
7415     Register ThisSrcReg  = MRI.createVirtualRegister(RC);
7416     Register ThisDestReg = (HaveSingleBase ? ThisSrcReg :
7417                             MRI.createVirtualRegister(RC));
7418     Register NextSrcReg  = MRI.createVirtualRegister(RC);
7419     Register NextDestReg = (HaveSingleBase ? NextSrcReg :
7420                             MRI.createVirtualRegister(RC));
7421 
7422     RC = &SystemZ::GR64BitRegClass;
7423     Register ThisCountReg = MRI.createVirtualRegister(RC);
7424     Register NextCountReg = MRI.createVirtualRegister(RC);
7425 
7426     MachineBasicBlock *StartMBB = MBB;
7427     MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
7428     MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
7429     MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
7430 
7431     //  StartMBB:
7432     //   # fall through to LoopMMB
7433     MBB->addSuccessor(LoopMBB);
7434 
7435     //  LoopMBB:
7436     //   %ThisDestReg = phi [ %StartDestReg, StartMBB ],
7437     //                      [ %NextDestReg, NextMBB ]
7438     //   %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
7439     //                     [ %NextSrcReg, NextMBB ]
7440     //   %ThisCountReg = phi [ %StartCountReg, StartMBB ],
7441     //                       [ %NextCountReg, NextMBB ]
7442     //   ( PFD 2, 768+DestDisp(%ThisDestReg) )
7443     //   Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
7444     //   ( JLH EndMBB )
7445     //
7446     // The prefetch is used only for MVC.  The JLH is used only for CLC.
7447     MBB = LoopMBB;
7448 
7449     BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
7450       .addReg(StartDestReg).addMBB(StartMBB)
7451       .addReg(NextDestReg).addMBB(NextMBB);
7452     if (!HaveSingleBase)
7453       BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
7454         .addReg(StartSrcReg).addMBB(StartMBB)
7455         .addReg(NextSrcReg).addMBB(NextMBB);
7456     BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
7457       .addReg(StartCountReg).addMBB(StartMBB)
7458       .addReg(NextCountReg).addMBB(NextMBB);
7459     if (Opcode == SystemZ::MVC)
7460       BuildMI(MBB, DL, TII->get(SystemZ::PFD))
7461         .addImm(SystemZ::PFD_WRITE)
7462         .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
7463     BuildMI(MBB, DL, TII->get(Opcode))
7464       .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
7465       .addReg(ThisSrcReg).addImm(SrcDisp);
7466     if (EndMBB) {
7467       BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7468         .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
7469         .addMBB(EndMBB);
7470       MBB->addSuccessor(EndMBB);
7471       MBB->addSuccessor(NextMBB);
7472     }
7473 
7474     // NextMBB:
7475     //   %NextDestReg = LA 256(%ThisDestReg)
7476     //   %NextSrcReg = LA 256(%ThisSrcReg)
7477     //   %NextCountReg = AGHI %ThisCountReg, -1
7478     //   CGHI %NextCountReg, 0
7479     //   JLH LoopMBB
7480     //   # fall through to DoneMMB
7481     //
7482     // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
7483     MBB = NextMBB;
7484 
7485     BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
7486       .addReg(ThisDestReg).addImm(256).addReg(0);
7487     if (!HaveSingleBase)
7488       BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
7489         .addReg(ThisSrcReg).addImm(256).addReg(0);
7490     BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
7491       .addReg(ThisCountReg).addImm(-1);
7492     BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
7493       .addReg(NextCountReg).addImm(0);
7494     BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7495       .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
7496       .addMBB(LoopMBB);
7497     MBB->addSuccessor(LoopMBB);
7498     MBB->addSuccessor(DoneMBB);
7499 
7500     DestBase = MachineOperand::CreateReg(NextDestReg, false);
7501     SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
7502     Length &= 255;
7503     if (EndMBB && !Length)
7504       // If the loop handled the whole CLC range, DoneMBB will be empty with
7505       // CC live-through into EndMBB, so add it as live-in.
7506       DoneMBB->addLiveIn(SystemZ::CC);
7507     MBB = DoneMBB;
7508   }
7509   // Handle any remaining bytes with straight-line code.
7510   while (Length > 0) {
7511     uint64_t ThisLength = std::min(Length, uint64_t(256));
7512     // The previous iteration might have created out-of-range displacements.
7513     // Apply them using LAY if so.
7514     if (!isUInt<12>(DestDisp)) {
7515       Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
7516       BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
7517           .add(DestBase)
7518           .addImm(DestDisp)
7519           .addReg(0);
7520       DestBase = MachineOperand::CreateReg(Reg, false);
7521       DestDisp = 0;
7522     }
7523     if (!isUInt<12>(SrcDisp)) {
7524       Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
7525       BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
7526           .add(SrcBase)
7527           .addImm(SrcDisp)
7528           .addReg(0);
7529       SrcBase = MachineOperand::CreateReg(Reg, false);
7530       SrcDisp = 0;
7531     }
7532     BuildMI(*MBB, MI, DL, TII->get(Opcode))
7533         .add(DestBase)
7534         .addImm(DestDisp)
7535         .addImm(ThisLength)
7536         .add(SrcBase)
7537         .addImm(SrcDisp)
7538         .setMemRefs(MI.memoperands());
7539     DestDisp += ThisLength;
7540     SrcDisp += ThisLength;
7541     Length -= ThisLength;
7542     // If there's another CLC to go, branch to the end if a difference
7543     // was found.
7544     if (EndMBB && Length > 0) {
7545       MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
7546       BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7547         .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
7548         .addMBB(EndMBB);
7549       MBB->addSuccessor(EndMBB);
7550       MBB->addSuccessor(NextMBB);
7551       MBB = NextMBB;
7552     }
7553   }
7554   if (EndMBB) {
7555     MBB->addSuccessor(EndMBB);
7556     MBB = EndMBB;
7557     MBB->addLiveIn(SystemZ::CC);
7558   }
7559 
7560   MI.eraseFromParent();
7561   return MBB;
7562 }
7563 
7564 // Decompose string pseudo-instruction MI into a loop that continually performs
7565 // Opcode until CC != 3.
7566 MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
7567     MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
7568   MachineFunction &MF = *MBB->getParent();
7569   const SystemZInstrInfo *TII =
7570       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7571   MachineRegisterInfo &MRI = MF.getRegInfo();
7572   DebugLoc DL = MI.getDebugLoc();
7573 
7574   uint64_t End1Reg = MI.getOperand(0).getReg();
7575   uint64_t Start1Reg = MI.getOperand(1).getReg();
7576   uint64_t Start2Reg = MI.getOperand(2).getReg();
7577   uint64_t CharReg = MI.getOperand(3).getReg();
7578 
7579   const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
7580   uint64_t This1Reg = MRI.createVirtualRegister(RC);
7581   uint64_t This2Reg = MRI.createVirtualRegister(RC);
7582   uint64_t End2Reg  = MRI.createVirtualRegister(RC);
7583 
7584   MachineBasicBlock *StartMBB = MBB;
7585   MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
7586   MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
7587 
7588   //  StartMBB:
7589   //   # fall through to LoopMMB
7590   MBB->addSuccessor(LoopMBB);
7591 
7592   //  LoopMBB:
7593   //   %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
7594   //   %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
7595   //   R0L = %CharReg
7596   //   %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
7597   //   JO LoopMBB
7598   //   # fall through to DoneMMB
7599   //
7600   // The load of R0L can be hoisted by post-RA LICM.
7601   MBB = LoopMBB;
7602 
7603   BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
7604     .addReg(Start1Reg).addMBB(StartMBB)
7605     .addReg(End1Reg).addMBB(LoopMBB);
7606   BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
7607     .addReg(Start2Reg).addMBB(StartMBB)
7608     .addReg(End2Reg).addMBB(LoopMBB);
7609   BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
7610   BuildMI(MBB, DL, TII->get(Opcode))
7611     .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
7612     .addReg(This1Reg).addReg(This2Reg);
7613   BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7614     .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);
7615   MBB->addSuccessor(LoopMBB);
7616   MBB->addSuccessor(DoneMBB);
7617 
7618   DoneMBB->addLiveIn(SystemZ::CC);
7619 
7620   MI.eraseFromParent();
7621   return DoneMBB;
7622 }
7623 
7624 // Update TBEGIN instruction with final opcode and register clobbers.
7625 MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
7626     MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
7627     bool NoFloat) const {
7628   MachineFunction &MF = *MBB->getParent();
7629   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
7630   const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
7631 
7632   // Update opcode.
7633   MI.setDesc(TII->get(Opcode));
7634 
7635   // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
7636   // Make sure to add the corresponding GRSM bits if they are missing.
7637   uint64_t Control = MI.getOperand(2).getImm();
7638   static const unsigned GPRControlBit[16] = {
7639     0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
7640     0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
7641   };
7642   Control |= GPRControlBit[15];
7643   if (TFI->hasFP(MF))
7644     Control |= GPRControlBit[11];
7645   MI.getOperand(2).setImm(Control);
7646 
7647   // Add GPR clobbers.
7648   for (int I = 0; I < 16; I++) {
7649     if ((Control & GPRControlBit[I]) == 0) {
7650       unsigned Reg = SystemZMC::GR64Regs[I];
7651       MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
7652     }
7653   }
7654 
7655   // Add FPR/VR clobbers.
7656   if (!NoFloat && (Control & 4) != 0) {
7657     if (Subtarget.hasVector()) {
7658       for (int I = 0; I < 32; I++) {
7659         unsigned Reg = SystemZMC::VR128Regs[I];
7660         MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
7661       }
7662     } else {
7663       for (int I = 0; I < 16; I++) {
7664         unsigned Reg = SystemZMC::FP64Regs[I];
7665         MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
7666       }
7667     }
7668   }
7669 
7670   return MBB;
7671 }
7672 
7673 MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
7674     MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
7675   MachineFunction &MF = *MBB->getParent();
7676   MachineRegisterInfo *MRI = &MF.getRegInfo();
7677   const SystemZInstrInfo *TII =
7678       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7679   DebugLoc DL = MI.getDebugLoc();
7680 
7681   Register SrcReg = MI.getOperand(0).getReg();
7682 
7683   // Create new virtual register of the same class as source.
7684   const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
7685   Register DstReg = MRI->createVirtualRegister(RC);
7686 
7687   // Replace pseudo with a normal load-and-test that models the def as
7688   // well.
7689   BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
7690     .addReg(SrcReg)
7691     .setMIFlags(MI.getFlags());
7692   MI.eraseFromParent();
7693 
7694   return MBB;
7695 }
7696 
7697 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
7698     MachineInstr &MI, MachineBasicBlock *MBB) const {
7699   switch (MI.getOpcode()) {
7700   case SystemZ::Select32:
7701   case SystemZ::Select64:
7702   case SystemZ::SelectF32:
7703   case SystemZ::SelectF64:
7704   case SystemZ::SelectF128:
7705   case SystemZ::SelectVR32:
7706   case SystemZ::SelectVR64:
7707   case SystemZ::SelectVR128:
7708     return emitSelect(MI, MBB);
7709 
7710   case SystemZ::CondStore8Mux:
7711     return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
7712   case SystemZ::CondStore8MuxInv:
7713     return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
7714   case SystemZ::CondStore16Mux:
7715     return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
7716   case SystemZ::CondStore16MuxInv:
7717     return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
7718   case SystemZ::CondStore32Mux:
7719     return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
7720   case SystemZ::CondStore32MuxInv:
7721     return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
7722   case SystemZ::CondStore8:
7723     return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
7724   case SystemZ::CondStore8Inv:
7725     return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
7726   case SystemZ::CondStore16:
7727     return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
7728   case SystemZ::CondStore16Inv:
7729     return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
7730   case SystemZ::CondStore32:
7731     return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
7732   case SystemZ::CondStore32Inv:
7733     return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
7734   case SystemZ::CondStore64:
7735     return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
7736   case SystemZ::CondStore64Inv:
7737     return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
7738   case SystemZ::CondStoreF32:
7739     return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
7740   case SystemZ::CondStoreF32Inv:
7741     return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
7742   case SystemZ::CondStoreF64:
7743     return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
7744   case SystemZ::CondStoreF64Inv:
7745     return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
7746 
7747   case SystemZ::PAIR128:
7748     return emitPair128(MI, MBB);
7749   case SystemZ::AEXT128:
7750     return emitExt128(MI, MBB, false);
7751   case SystemZ::ZEXT128:
7752     return emitExt128(MI, MBB, true);
7753 
7754   case SystemZ::ATOMIC_SWAPW:
7755     return emitAtomicLoadBinary(MI, MBB, 0, 0);
7756   case SystemZ::ATOMIC_SWAP_32:
7757     return emitAtomicLoadBinary(MI, MBB, 0, 32);
7758   case SystemZ::ATOMIC_SWAP_64:
7759     return emitAtomicLoadBinary(MI, MBB, 0, 64);
7760 
7761   case SystemZ::ATOMIC_LOADW_AR:
7762     return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
7763   case SystemZ::ATOMIC_LOADW_AFI:
7764     return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
7765   case SystemZ::ATOMIC_LOAD_AR:
7766     return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
7767   case SystemZ::ATOMIC_LOAD_AHI:
7768     return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
7769   case SystemZ::ATOMIC_LOAD_AFI:
7770     return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
7771   case SystemZ::ATOMIC_LOAD_AGR:
7772     return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
7773   case SystemZ::ATOMIC_LOAD_AGHI:
7774     return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
7775   case SystemZ::ATOMIC_LOAD_AGFI:
7776     return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
7777 
7778   case SystemZ::ATOMIC_LOADW_SR:
7779     return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
7780   case SystemZ::ATOMIC_LOAD_SR:
7781     return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
7782   case SystemZ::ATOMIC_LOAD_SGR:
7783     return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
7784 
7785   case SystemZ::ATOMIC_LOADW_NR:
7786     return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
7787   case SystemZ::ATOMIC_LOADW_NILH:
7788     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
7789   case SystemZ::ATOMIC_LOAD_NR:
7790     return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
7791   case SystemZ::ATOMIC_LOAD_NILL:
7792     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
7793   case SystemZ::ATOMIC_LOAD_NILH:
7794     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
7795   case SystemZ::ATOMIC_LOAD_NILF:
7796     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
7797   case SystemZ::ATOMIC_LOAD_NGR:
7798     return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
7799   case SystemZ::ATOMIC_LOAD_NILL64:
7800     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
7801   case SystemZ::ATOMIC_LOAD_NILH64:
7802     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
7803   case SystemZ::ATOMIC_LOAD_NIHL64:
7804     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
7805   case SystemZ::ATOMIC_LOAD_NIHH64:
7806     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
7807   case SystemZ::ATOMIC_LOAD_NILF64:
7808     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
7809   case SystemZ::ATOMIC_LOAD_NIHF64:
7810     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
7811 
7812   case SystemZ::ATOMIC_LOADW_OR:
7813     return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
7814   case SystemZ::ATOMIC_LOADW_OILH:
7815     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
7816   case SystemZ::ATOMIC_LOAD_OR:
7817     return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
7818   case SystemZ::ATOMIC_LOAD_OILL:
7819     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
7820   case SystemZ::ATOMIC_LOAD_OILH:
7821     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
7822   case SystemZ::ATOMIC_LOAD_OILF:
7823     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
7824   case SystemZ::ATOMIC_LOAD_OGR:
7825     return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
7826   case SystemZ::ATOMIC_LOAD_OILL64:
7827     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
7828   case SystemZ::ATOMIC_LOAD_OILH64:
7829     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
7830   case SystemZ::ATOMIC_LOAD_OIHL64:
7831     return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
7832   case SystemZ::ATOMIC_LOAD_OIHH64:
7833     return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
7834   case SystemZ::ATOMIC_LOAD_OILF64:
7835     return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
7836   case SystemZ::ATOMIC_LOAD_OIHF64:
7837     return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
7838 
7839   case SystemZ::ATOMIC_LOADW_XR:
7840     return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
7841   case SystemZ::ATOMIC_LOADW_XILF:
7842     return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
7843   case SystemZ::ATOMIC_LOAD_XR:
7844     return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
7845   case SystemZ::ATOMIC_LOAD_XILF:
7846     return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
7847   case SystemZ::ATOMIC_LOAD_XGR:
7848     return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
7849   case SystemZ::ATOMIC_LOAD_XILF64:
7850     return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
7851   case SystemZ::ATOMIC_LOAD_XIHF64:
7852     return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
7853 
7854   case SystemZ::ATOMIC_LOADW_NRi:
7855     return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
7856   case SystemZ::ATOMIC_LOADW_NILHi:
7857     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
7858   case SystemZ::ATOMIC_LOAD_NRi:
7859     return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
7860   case SystemZ::ATOMIC_LOAD_NILLi:
7861     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
7862   case SystemZ::ATOMIC_LOAD_NILHi:
7863     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
7864   case SystemZ::ATOMIC_LOAD_NILFi:
7865     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
7866   case SystemZ::ATOMIC_LOAD_NGRi:
7867     return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
7868   case SystemZ::ATOMIC_LOAD_NILL64i:
7869     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
7870   case SystemZ::ATOMIC_LOAD_NILH64i:
7871     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
7872   case SystemZ::ATOMIC_LOAD_NIHL64i:
7873     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
7874   case SystemZ::ATOMIC_LOAD_NIHH64i:
7875     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
7876   case SystemZ::ATOMIC_LOAD_NILF64i:
7877     return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
7878   case SystemZ::ATOMIC_LOAD_NIHF64i:
7879     return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
7880 
7881   case SystemZ::ATOMIC_LOADW_MIN:
7882     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7883                                 SystemZ::CCMASK_CMP_LE, 0);
7884   case SystemZ::ATOMIC_LOAD_MIN_32:
7885     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7886                                 SystemZ::CCMASK_CMP_LE, 32);
7887   case SystemZ::ATOMIC_LOAD_MIN_64:
7888     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
7889                                 SystemZ::CCMASK_CMP_LE, 64);
7890 
7891   case SystemZ::ATOMIC_LOADW_MAX:
7892     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7893                                 SystemZ::CCMASK_CMP_GE, 0);
7894   case SystemZ::ATOMIC_LOAD_MAX_32:
7895     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7896                                 SystemZ::CCMASK_CMP_GE, 32);
7897   case SystemZ::ATOMIC_LOAD_MAX_64:
7898     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
7899                                 SystemZ::CCMASK_CMP_GE, 64);
7900 
7901   case SystemZ::ATOMIC_LOADW_UMIN:
7902     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7903                                 SystemZ::CCMASK_CMP_LE, 0);
7904   case SystemZ::ATOMIC_LOAD_UMIN_32:
7905     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7906                                 SystemZ::CCMASK_CMP_LE, 32);
7907   case SystemZ::ATOMIC_LOAD_UMIN_64:
7908     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
7909                                 SystemZ::CCMASK_CMP_LE, 64);
7910 
7911   case SystemZ::ATOMIC_LOADW_UMAX:
7912     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7913                                 SystemZ::CCMASK_CMP_GE, 0);
7914   case SystemZ::ATOMIC_LOAD_UMAX_32:
7915     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7916                                 SystemZ::CCMASK_CMP_GE, 32);
7917   case SystemZ::ATOMIC_LOAD_UMAX_64:
7918     return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
7919                                 SystemZ::CCMASK_CMP_GE, 64);
7920 
7921   case SystemZ::ATOMIC_CMP_SWAPW:
7922     return emitAtomicCmpSwapW(MI, MBB);
7923   case SystemZ::MVCSequence:
7924   case SystemZ::MVCLoop:
7925     return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
7926   case SystemZ::NCSequence:
7927   case SystemZ::NCLoop:
7928     return emitMemMemWrapper(MI, MBB, SystemZ::NC);
7929   case SystemZ::OCSequence:
7930   case SystemZ::OCLoop:
7931     return emitMemMemWrapper(MI, MBB, SystemZ::OC);
7932   case SystemZ::XCSequence:
7933   case SystemZ::XCLoop:
7934     return emitMemMemWrapper(MI, MBB, SystemZ::XC);
7935   case SystemZ::CLCSequence:
7936   case SystemZ::CLCLoop:
7937     return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
7938   case SystemZ::CLSTLoop:
7939     return emitStringWrapper(MI, MBB, SystemZ::CLST);
7940   case SystemZ::MVSTLoop:
7941     return emitStringWrapper(MI, MBB, SystemZ::MVST);
7942   case SystemZ::SRSTLoop:
7943     return emitStringWrapper(MI, MBB, SystemZ::SRST);
7944   case SystemZ::TBEGIN:
7945     return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
7946   case SystemZ::TBEGIN_nofloat:
7947     return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
7948   case SystemZ::TBEGINC:
7949     return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
7950   case SystemZ::LTEBRCompare_VecPseudo:
7951     return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
7952   case SystemZ::LTDBRCompare_VecPseudo:
7953     return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
7954   case SystemZ::LTXBRCompare_VecPseudo:
7955     return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
7956 
7957   case TargetOpcode::STACKMAP:
7958   case TargetOpcode::PATCHPOINT:
7959     return emitPatchPoint(MI, MBB);
7960 
7961   default:
7962     llvm_unreachable("Unexpected instr type to insert");
7963   }
7964 }
7965 
7966 // This is only used by the isel schedulers, and is needed only to prevent
7967 // compiler from crashing when list-ilp is used.
7968 const TargetRegisterClass *
7969 SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
7970   if (VT == MVT::Untyped)
7971     return &SystemZ::ADDR128BitRegClass;
7972   return TargetLowering::getRepRegClassFor(VT);
7973 }
7974