1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that VE uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "VEISelLowering.h"
15 #include "MCTargetDesc/VEMCExpr.h"
16 #include "VECustomDAG.h"
17 #include "VEInstrBuilder.h"
18 #include "VEMachineFunctionInfo.h"
19 #include "VERegisterInfo.h"
20 #include "VETargetMachine.h"
21 #include "llvm/ADT/StringSwitch.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
27 #include "llvm/CodeGen/MachineModuleInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/SelectionDAG.h"
30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Module.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/KnownBits.h"
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "ve-lower"
40 
41 //===----------------------------------------------------------------------===//
42 // Calling Convention Implementation
43 //===----------------------------------------------------------------------===//
44 
45 #include "VEGenCallingConv.inc"
46 
47 CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
48   switch (CallConv) {
49   default:
50     return RetCC_VE_C;
51   case CallingConv::Fast:
52     return RetCC_VE_Fast;
53   }
54 }
55 
56 CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
57   if (IsVarArg)
58     return CC_VE2;
59   switch (CallConv) {
60   default:
61     return CC_VE_C;
62   case CallingConv::Fast:
63     return CC_VE_Fast;
64   }
65 }
66 
67 bool VETargetLowering::CanLowerReturn(
68     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
69     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
70   CCAssignFn *RetCC = getReturnCC(CallConv);
71   SmallVector<CCValAssign, 16> RVLocs;
72   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73   return CCInfo.CheckReturn(Outs, RetCC);
74 }
75 
76 static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
77                                    MVT::v256f32, MVT::v512f32, MVT::v256f64};
78 
79 static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80 
81 static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82 
83 void VETargetLowering::initRegisterClasses() {
84   // Set up the register classes.
85   addRegisterClass(MVT::i32, &VE::I32RegClass);
86   addRegisterClass(MVT::i64, &VE::I64RegClass);
87   addRegisterClass(MVT::f32, &VE::F32RegClass);
88   addRegisterClass(MVT::f64, &VE::I64RegClass);
89   addRegisterClass(MVT::f128, &VE::F128RegClass);
90 
91   if (Subtarget->enableVPU()) {
92     for (MVT VecVT : AllVectorVTs)
93       addRegisterClass(VecVT, &VE::V64RegClass);
94     addRegisterClass(MVT::v256i1, &VE::VMRegClass);
95     addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
96   }
97 }
98 
99 void VETargetLowering::initSPUActions() {
100   const auto &TM = getTargetMachine();
101   /// Load & Store {
102 
103   // VE doesn't have i1 sign extending load.
104   for (MVT VT : MVT::integer_valuetypes()) {
105     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
106     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
107     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
108     setTruncStoreAction(VT, MVT::i1, Expand);
109   }
110 
111   // VE doesn't have floating point extload/truncstore, so expand them.
112   for (MVT FPVT : MVT::fp_valuetypes()) {
113     for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114       setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
115       setTruncStoreAction(FPVT, OtherFPVT, Expand);
116     }
117   }
118 
119   // VE doesn't have fp128 load/store, so expand them in custom lower.
120   setOperationAction(ISD::LOAD, MVT::f128, Custom);
121   setOperationAction(ISD::STORE, MVT::f128, Custom);
122 
123   /// } Load & Store
124 
125   // Custom legalize address nodes into LO/HI parts.
126   MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
127   setOperationAction(ISD::BlockAddress, PtrVT, Custom);
128   setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
129   setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
130   setOperationAction(ISD::ConstantPool, PtrVT, Custom);
131   setOperationAction(ISD::JumpTable, PtrVT, Custom);
132 
133   /// VAARG handling {
134   setOperationAction(ISD::VASTART, MVT::Other, Custom);
135   // VAARG needs to be lowered to access with 8 bytes alignment.
136   setOperationAction(ISD::VAARG, MVT::Other, Custom);
137   // Use the default implementation.
138   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
139   setOperationAction(ISD::VAEND, MVT::Other, Expand);
140   /// } VAARG handling
141 
142   /// Stack {
143   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
144   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
145 
146   // Use the default implementation.
147   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
148   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
149   /// } Stack
150 
151   /// Branch {
152 
153   // VE doesn't have BRCOND
154   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
155 
156   // BR_JT is not implemented yet.
157   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158 
159   /// } Branch
160 
161   /// Int Ops {
162   for (MVT IntVT : {MVT::i32, MVT::i64}) {
163     // VE has no REM or DIVREM operations.
164     setOperationAction(ISD::UREM, IntVT, Expand);
165     setOperationAction(ISD::SREM, IntVT, Expand);
166     setOperationAction(ISD::SDIVREM, IntVT, Expand);
167     setOperationAction(ISD::UDIVREM, IntVT, Expand);
168 
169     // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170     setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
171     setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
172     setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
173 
174     // VE has no MULHU/S or U/SMUL_LOHI operations.
175     // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176     setOperationAction(ISD::MULHU, IntVT, Expand);
177     setOperationAction(ISD::MULHS, IntVT, Expand);
178     setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
179     setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
180 
181     // VE has no CTTZ, ROTL, ROTR operations.
182     setOperationAction(ISD::CTTZ, IntVT, Expand);
183     setOperationAction(ISD::ROTL, IntVT, Expand);
184     setOperationAction(ISD::ROTR, IntVT, Expand);
185 
186     // VE has 64 bits instruction which works as i64 BSWAP operation.  This
187     // instruction works fine as i32 BSWAP operation with an additional
188     // parameter.  Use isel patterns to lower BSWAP.
189     setOperationAction(ISD::BSWAP, IntVT, Legal);
190 
191     // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192     // operations.  Use isel patterns for i64, promote for i32.
193     LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
194     setOperationAction(ISD::BITREVERSE, IntVT, Act);
195     setOperationAction(ISD::CTLZ, IntVT, Act);
196     setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
197     setOperationAction(ISD::CTPOP, IntVT, Act);
198 
199     // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200     // Use isel patterns for i64, promote for i32.
201     setOperationAction(ISD::AND, IntVT, Act);
202     setOperationAction(ISD::OR, IntVT, Act);
203     setOperationAction(ISD::XOR, IntVT, Act);
204   }
205   /// } Int Ops
206 
207   /// Conversion {
208   // VE doesn't have instructions for fp<->uint, so expand them by llvm
209   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
210   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
211   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
212   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
213 
214   // fp16 not supported
215   for (MVT FPVT : MVT::fp_valuetypes()) {
216     setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
217     setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
218   }
219   /// } Conversion
220 
221   /// Floating-point Ops {
222   /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
223   ///       and fcmp.
224 
225   // VE doesn't have following floating point operations.
226   for (MVT VT : MVT::fp_valuetypes()) {
227     setOperationAction(ISD::FNEG, VT, Expand);
228     setOperationAction(ISD::FREM, VT, Expand);
229   }
230 
231   // VE doesn't have fdiv of f128.
232   setOperationAction(ISD::FDIV, MVT::f128, Expand);
233 
234   for (MVT FPVT : {MVT::f32, MVT::f64}) {
235     // f32 and f64 uses ConstantFP.  f128 uses ConstantPool.
236     setOperationAction(ISD::ConstantFP, FPVT, Legal);
237   }
238   /// } Floating-point Ops
239 
240   /// Floating-point math functions {
241 
242   // VE doesn't have following floating point math functions.
243   for (MVT VT : MVT::fp_valuetypes()) {
244     setOperationAction(ISD::FABS, VT, Expand);
245     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
246     setOperationAction(ISD::FCOS, VT, Expand);
247     setOperationAction(ISD::FSIN, VT, Expand);
248     setOperationAction(ISD::FSQRT, VT, Expand);
249   }
250 
251   /// } Floating-point math functions
252 
253   /// Atomic instructions {
254 
255   setMaxAtomicSizeInBitsSupported(64);
256   setMinCmpXchgSizeInBits(32);
257   setSupportsUnalignedAtomics(false);
258 
259   // Use custom inserter for ATOMIC_FENCE.
260   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
261 
262   // Other atomic instructions.
263   for (MVT VT : MVT::integer_valuetypes()) {
264     // Support i8/i16 atomic swap.
265     setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
266 
267     // FIXME: Support "atmam" instructions.
268     setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
269     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
270     setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
271     setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
272 
273     // VE doesn't have follwing instructions.
274     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
275     setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
276     setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
277     setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
278     setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
279     setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
280     setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
281     setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
282   }
283 
284   /// } Atomic instructions
285 
286   /// SJLJ instructions {
287   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
288   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
289   setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
290   if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
291     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
292   /// } SJLJ instructions
293 
294   // Intrinsic instructions
295   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
296 }
297 
298 void VETargetLowering::initVPUActions() {
299   for (MVT LegalMaskVT : AllMaskVTs)
300     setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
301 
302   for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
303     setOperationAction(Opc, MVT::v512i1, Custom);
304 
305   for (MVT LegalVecVT : AllVectorVTs) {
306     setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
307     setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
308     setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
309     // Translate all vector instructions with legal element types to VVP_*
310     // nodes.
311     // TODO We will custom-widen into VVP_* nodes in the future. While we are
312     // buildling the infrastructure for this, we only do this for legal vector
313     // VTs.
314 #define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME)                                     \
315   setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
316 #define ADD_VVP_OP(VVP_NAME, ISD_NAME)                                         \
317   setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
318 #include "VVPNodes.def"
319   }
320 
321   for (MVT LegalPackedVT : AllPackedVTs) {
322     setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
323     setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
324   }
325 
326   // vNt32, vNt64 ops (legal element types)
327   for (MVT VT : MVT::vector_valuetypes()) {
328     MVT ElemVT = VT.getVectorElementType();
329     unsigned ElemBits = ElemVT.getScalarSizeInBits();
330     if (ElemBits != 32 && ElemBits != 64)
331       continue;
332 
333     for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
334       setOperationAction(MemOpc, VT, Custom);
335   }
336 }
337 
338 SDValue
339 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
340                               bool IsVarArg,
341                               const SmallVectorImpl<ISD::OutputArg> &Outs,
342                               const SmallVectorImpl<SDValue> &OutVals,
343                               const SDLoc &DL, SelectionDAG &DAG) const {
344   // CCValAssign - represent the assignment of the return value to locations.
345   SmallVector<CCValAssign, 16> RVLocs;
346 
347   // CCState - Info about the registers and stack slot.
348   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
349                  *DAG.getContext());
350 
351   // Analyze return values.
352   CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
353 
354   SDValue Flag;
355   SmallVector<SDValue, 4> RetOps(1, Chain);
356 
357   // Copy the result values into the output registers.
358   for (unsigned i = 0; i != RVLocs.size(); ++i) {
359     CCValAssign &VA = RVLocs[i];
360     assert(VA.isRegLoc() && "Can only return in registers!");
361     assert(!VA.needsCustom() && "Unexpected custom lowering");
362     SDValue OutVal = OutVals[i];
363 
364     // Integer return values must be sign or zero extended by the callee.
365     switch (VA.getLocInfo()) {
366     case CCValAssign::Full:
367       break;
368     case CCValAssign::SExt:
369       OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
370       break;
371     case CCValAssign::ZExt:
372       OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
373       break;
374     case CCValAssign::AExt:
375       OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
376       break;
377     case CCValAssign::BCvt: {
378       // Convert a float return value to i64 with padding.
379       //     63     31   0
380       //    +------+------+
381       //    | float|   0  |
382       //    +------+------+
383       assert(VA.getLocVT() == MVT::i64);
384       assert(VA.getValVT() == MVT::f32);
385       SDValue Undef = SDValue(
386           DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
387       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
388       OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
389                                           MVT::i64, Undef, OutVal, Sub_f32),
390                        0);
391       break;
392     }
393     default:
394       llvm_unreachable("Unknown loc info!");
395     }
396 
397     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
398 
399     // Guarantee that all emitted copies are stuck together with flags.
400     Flag = Chain.getValue(1);
401     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
402   }
403 
404   RetOps[0] = Chain; // Update chain.
405 
406   // Add the flag if we have it.
407   if (Flag.getNode())
408     RetOps.push_back(Flag);
409 
410   return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
411 }
412 
413 SDValue VETargetLowering::LowerFormalArguments(
414     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
415     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
416     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
417   MachineFunction &MF = DAG.getMachineFunction();
418 
419   // Get the base offset of the incoming arguments stack space.
420   unsigned ArgsBaseOffset = Subtarget->getRsaSize();
421   // Get the size of the preserved arguments area
422   unsigned ArgsPreserved = 64;
423 
424   // Analyze arguments according to CC_VE.
425   SmallVector<CCValAssign, 16> ArgLocs;
426   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
427                  *DAG.getContext());
428   // Allocate the preserved area first.
429   CCInfo.AllocateStack(ArgsPreserved, Align(8));
430   // We already allocated the preserved area, so the stack offset computed
431   // by CC_VE would be correct now.
432   CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
433 
434   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
435     CCValAssign &VA = ArgLocs[i];
436     assert(!VA.needsCustom() && "Unexpected custom lowering");
437     if (VA.isRegLoc()) {
438       // This argument is passed in a register.
439       // All integer register arguments are promoted by the caller to i64.
440 
441       // Create a virtual register for the promoted live-in value.
442       Register VReg =
443           MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
444       SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
445 
446       // The caller promoted the argument, so insert an Assert?ext SDNode so we
447       // won't promote the value again in this function.
448       switch (VA.getLocInfo()) {
449       case CCValAssign::SExt:
450         Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
451                           DAG.getValueType(VA.getValVT()));
452         break;
453       case CCValAssign::ZExt:
454         Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
455                           DAG.getValueType(VA.getValVT()));
456         break;
457       case CCValAssign::BCvt: {
458         // Extract a float argument from i64 with padding.
459         //     63     31   0
460         //    +------+------+
461         //    | float|   0  |
462         //    +------+------+
463         assert(VA.getLocVT() == MVT::i64);
464         assert(VA.getValVT() == MVT::f32);
465         SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
466         Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
467                                          MVT::f32, Arg, Sub_f32),
468                       0);
469         break;
470       }
471       default:
472         break;
473       }
474 
475       // Truncate the register down to the argument type.
476       if (VA.isExtInLoc())
477         Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
478 
479       InVals.push_back(Arg);
480       continue;
481     }
482 
483     // The registers are exhausted. This argument was passed on the stack.
484     assert(VA.isMemLoc());
485     // The CC_VE_Full/Half functions compute stack offsets relative to the
486     // beginning of the arguments area at %fp + the size of reserved area.
487     unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
488     unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
489 
490     // Adjust offset for a float argument by adding 4 since the argument is
491     // stored in 8 bytes buffer with offset like below.  LLVM generates
492     // 4 bytes load instruction, so need to adjust offset here.  This
493     // adjustment is required in only LowerFormalArguments.  In LowerCall,
494     // a float argument is converted to i64 first, and stored as 8 bytes
495     // data, which is required by ABI, so no need for adjustment.
496     //    0      4
497     //    +------+------+
498     //    | empty| float|
499     //    +------+------+
500     if (VA.getValVT() == MVT::f32)
501       Offset += 4;
502 
503     int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
504     InVals.push_back(
505         DAG.getLoad(VA.getValVT(), DL, Chain,
506                     DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
507                     MachinePointerInfo::getFixedStack(MF, FI)));
508   }
509 
510   if (!IsVarArg)
511     return Chain;
512 
513   // This function takes variable arguments, some of which may have been passed
514   // in registers %s0-%s8.
515   //
516   // The va_start intrinsic needs to know the offset to the first variable
517   // argument.
518   // TODO: need to calculate offset correctly once we support f128.
519   unsigned ArgOffset = ArgLocs.size() * 8;
520   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
521   // Skip the reserved area at the top of stack.
522   FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
523 
524   return Chain;
525 }
526 
527 // FIXME? Maybe this could be a TableGen attribute on some registers and
528 // this table could be generated automatically from RegInfo.
529 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
530                                              const MachineFunction &MF) const {
531   Register Reg = StringSwitch<Register>(RegName)
532                      .Case("sp", VE::SX11)    // Stack pointer
533                      .Case("fp", VE::SX9)     // Frame pointer
534                      .Case("sl", VE::SX8)     // Stack limit
535                      .Case("lr", VE::SX10)    // Link register
536                      .Case("tp", VE::SX14)    // Thread pointer
537                      .Case("outer", VE::SX12) // Outer regiser
538                      .Case("info", VE::SX17)  // Info area register
539                      .Case("got", VE::SX15)   // Global offset table register
540                      .Case("plt", VE::SX16) // Procedure linkage table register
541                      .Default(0);
542 
543   if (Reg)
544     return Reg;
545 
546   report_fatal_error("Invalid register name global variable");
547 }
548 
549 //===----------------------------------------------------------------------===//
550 // TargetLowering Implementation
551 //===----------------------------------------------------------------------===//
552 
553 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
554                                     SmallVectorImpl<SDValue> &InVals) const {
555   SelectionDAG &DAG = CLI.DAG;
556   SDLoc DL = CLI.DL;
557   SDValue Chain = CLI.Chain;
558   auto PtrVT = getPointerTy(DAG.getDataLayout());
559 
560   // VE target does not yet support tail call optimization.
561   CLI.IsTailCall = false;
562 
563   // Get the base offset of the outgoing arguments stack space.
564   unsigned ArgsBaseOffset = Subtarget->getRsaSize();
565   // Get the size of the preserved arguments area
566   unsigned ArgsPreserved = 8 * 8u;
567 
568   // Analyze operands of the call, assigning locations to each operand.
569   SmallVector<CCValAssign, 16> ArgLocs;
570   CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
571                  *DAG.getContext());
572   // Allocate the preserved area first.
573   CCInfo.AllocateStack(ArgsPreserved, Align(8));
574   // We already allocated the preserved area, so the stack offset computed
575   // by CC_VE would be correct now.
576   CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
577 
578   // VE requires to use both register and stack for varargs or no-prototyped
579   // functions.
580   bool UseBoth = CLI.IsVarArg;
581 
582   // Analyze operands again if it is required to store BOTH.
583   SmallVector<CCValAssign, 16> ArgLocs2;
584   CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
585                   ArgLocs2, *DAG.getContext());
586   if (UseBoth)
587     CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
588 
589   // Get the size of the outgoing arguments stack space requirement.
590   unsigned ArgsSize = CCInfo.getNextStackOffset();
591 
592   // Keep stack frames 16-byte aligned.
593   ArgsSize = alignTo(ArgsSize, 16);
594 
595   // Adjust the stack pointer to make room for the arguments.
596   // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
597   // with more than 6 arguments.
598   Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
599 
600   // Collect the set of registers to pass to the function and their values.
601   // This will be emitted as a sequence of CopyToReg nodes glued to the call
602   // instruction.
603   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
604 
605   // Collect chains from all the memory opeations that copy arguments to the
606   // stack. They must follow the stack pointer adjustment above and precede the
607   // call instruction itself.
608   SmallVector<SDValue, 8> MemOpChains;
609 
610   // VE needs to get address of callee function in a register
611   // So, prepare to copy it to SX12 here.
612 
613   // If the callee is a GlobalAddress node (quite common, every direct call is)
614   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
615   // Likewise ExternalSymbol -> TargetExternalSymbol.
616   SDValue Callee = CLI.Callee;
617 
618   bool IsPICCall = isPositionIndependent();
619 
620   // PC-relative references to external symbols should go through $stub.
621   // If so, we need to prepare GlobalBaseReg first.
622   const TargetMachine &TM = DAG.getTarget();
623   const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
624   const GlobalValue *GV = nullptr;
625   auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
626   if (CalleeG)
627     GV = CalleeG->getGlobal();
628   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
629   bool UsePlt = !Local;
630   MachineFunction &MF = DAG.getMachineFunction();
631 
632   // Turn GlobalAddress/ExternalSymbol node into a value node
633   // containing the address of them here.
634   if (CalleeG) {
635     if (IsPICCall) {
636       if (UsePlt)
637         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
638       Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
639       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
640     } else {
641       Callee =
642           makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
643     }
644   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
645     if (IsPICCall) {
646       if (UsePlt)
647         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
648       Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
649       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
650     } else {
651       Callee =
652           makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
653     }
654   }
655 
656   RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
657 
658   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
659     CCValAssign &VA = ArgLocs[i];
660     SDValue Arg = CLI.OutVals[i];
661 
662     // Promote the value if needed.
663     switch (VA.getLocInfo()) {
664     default:
665       llvm_unreachable("Unknown location info!");
666     case CCValAssign::Full:
667       break;
668     case CCValAssign::SExt:
669       Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
670       break;
671     case CCValAssign::ZExt:
672       Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
673       break;
674     case CCValAssign::AExt:
675       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
676       break;
677     case CCValAssign::BCvt: {
678       // Convert a float argument to i64 with padding.
679       //     63     31   0
680       //    +------+------+
681       //    | float|   0  |
682       //    +------+------+
683       assert(VA.getLocVT() == MVT::i64);
684       assert(VA.getValVT() == MVT::f32);
685       SDValue Undef = SDValue(
686           DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
687       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
688       Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
689                                        MVT::i64, Undef, Arg, Sub_f32),
690                     0);
691       break;
692     }
693     }
694 
695     if (VA.isRegLoc()) {
696       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
697       if (!UseBoth)
698         continue;
699       VA = ArgLocs2[i];
700     }
701 
702     assert(VA.isMemLoc());
703 
704     // Create a store off the stack pointer for this argument.
705     SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
706     // The argument area starts at %fp/%sp + the size of reserved area.
707     SDValue PtrOff =
708         DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
709     PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
710     MemOpChains.push_back(
711         DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
712   }
713 
714   // Emit all stores, make sure they occur before the call.
715   if (!MemOpChains.empty())
716     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
717 
718   // Build a sequence of CopyToReg nodes glued together with token chain and
719   // glue operands which copy the outgoing args into registers. The InGlue is
720   // necessary since all emitted instructions must be stuck together in order
721   // to pass the live physical registers.
722   SDValue InGlue;
723   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
724     Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
725                              RegsToPass[i].second, InGlue);
726     InGlue = Chain.getValue(1);
727   }
728 
729   // Build the operands for the call instruction itself.
730   SmallVector<SDValue, 8> Ops;
731   Ops.push_back(Chain);
732   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
733     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
734                                   RegsToPass[i].second.getValueType()));
735 
736   // Add a register mask operand representing the call-preserved registers.
737   const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
738   const uint32_t *Mask =
739       TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
740   assert(Mask && "Missing call preserved mask for calling convention");
741   Ops.push_back(DAG.getRegisterMask(Mask));
742 
743   // Make sure the CopyToReg nodes are glued to the call instruction which
744   // consumes the registers.
745   if (InGlue.getNode())
746     Ops.push_back(InGlue);
747 
748   // Now the call itself.
749   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
750   Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
751   InGlue = Chain.getValue(1);
752 
753   // Revert the stack pointer immediately after the call.
754   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
755                              DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
756   InGlue = Chain.getValue(1);
757 
758   // Now extract the return values. This is more or less the same as
759   // LowerFormalArguments.
760 
761   // Assign locations to each value returned by this call.
762   SmallVector<CCValAssign, 16> RVLocs;
763   CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
764                  *DAG.getContext());
765 
766   // Set inreg flag manually for codegen generated library calls that
767   // return float.
768   if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
769     CLI.Ins[0].Flags.setInReg();
770 
771   RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
772 
773   // Copy all of the result registers out of their specified physreg.
774   for (unsigned i = 0; i != RVLocs.size(); ++i) {
775     CCValAssign &VA = RVLocs[i];
776     assert(!VA.needsCustom() && "Unexpected custom lowering");
777     Register Reg = VA.getLocReg();
778 
779     // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
780     // reside in the same register in the high and low bits. Reuse the
781     // CopyFromReg previous node to avoid duplicate copies.
782     SDValue RV;
783     if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
784       if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
785         RV = Chain.getValue(0);
786 
787     // But usually we'll create a new CopyFromReg for a different register.
788     if (!RV.getNode()) {
789       RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
790       Chain = RV.getValue(1);
791       InGlue = Chain.getValue(2);
792     }
793 
794     // The callee promoted the return value, so insert an Assert?ext SDNode so
795     // we won't promote the value again in this function.
796     switch (VA.getLocInfo()) {
797     case CCValAssign::SExt:
798       RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
799                        DAG.getValueType(VA.getValVT()));
800       break;
801     case CCValAssign::ZExt:
802       RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
803                        DAG.getValueType(VA.getValVT()));
804       break;
805     case CCValAssign::BCvt: {
806       // Extract a float return value from i64 with padding.
807       //     63     31   0
808       //    +------+------+
809       //    | float|   0  |
810       //    +------+------+
811       assert(VA.getLocVT() == MVT::i64);
812       assert(VA.getValVT() == MVT::f32);
813       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
814       RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
815                                       MVT::f32, RV, Sub_f32),
816                    0);
817       break;
818     }
819     default:
820       break;
821     }
822 
823     // Truncate the register down to the return value type.
824     if (VA.isExtInLoc())
825       RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
826 
827     InVals.push_back(RV);
828   }
829 
830   return Chain;
831 }
832 
833 bool VETargetLowering::isOffsetFoldingLegal(
834     const GlobalAddressSDNode *GA) const {
835   // VE uses 64 bit addressing, so we need multiple instructions to generate
836   // an address.  Folding address with offset increases the number of
837   // instructions, so that we disable it here.  Offsets will be folded in
838   // the DAG combine later if it worth to do so.
839   return false;
840 }
841 
842 /// isFPImmLegal - Returns true if the target can instruction select the
843 /// specified FP immediate natively. If false, the legalizer will
844 /// materialize the FP immediate as a load from a constant pool.
845 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
846                                     bool ForCodeSize) const {
847   return VT == MVT::f32 || VT == MVT::f64;
848 }
849 
850 /// Determine if the target supports unaligned memory accesses.
851 ///
852 /// This function returns true if the target allows unaligned memory accesses
853 /// of the specified type in the given address space. If true, it also returns
854 /// whether the unaligned memory access is "fast" in the last argument by
855 /// reference. This is used, for example, in situations where an array
856 /// copy/move/set is converted to a sequence of store operations. Its use
857 /// helps to ensure that such replacements don't generate code that causes an
858 /// alignment error (trap) on the target machine.
859 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
860                                                       unsigned AddrSpace,
861                                                       Align A,
862                                                       MachineMemOperand::Flags,
863                                                       bool *Fast) const {
864   if (Fast) {
865     // It's fast anytime on VE
866     *Fast = true;
867   }
868   return true;
869 }
870 
871 VETargetLowering::VETargetLowering(const TargetMachine &TM,
872                                    const VESubtarget &STI)
873     : TargetLowering(TM), Subtarget(&STI) {
874   // Instructions which use registers as conditionals examine all the
875   // bits (as does the pseudo SELECT_CC expansion). I don't think it
876   // matters much whether it's ZeroOrOneBooleanContent, or
877   // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
878   // former.
879   setBooleanContents(ZeroOrOneBooleanContent);
880   setBooleanVectorContents(ZeroOrOneBooleanContent);
881 
882   initRegisterClasses();
883   initSPUActions();
884   initVPUActions();
885 
886   setStackPointerRegisterToSaveRestore(VE::SX11);
887 
888   // We have target-specific dag combine patterns for the following nodes:
889   setTargetDAGCombine(ISD::TRUNCATE);
890 
891   // Set function alignment to 16 bytes
892   setMinFunctionAlignment(Align(16));
893 
894   // VE stores all argument by 8 bytes alignment
895   setMinStackArgumentAlignment(Align(8));
896 
897   computeRegisterProperties(Subtarget->getRegisterInfo());
898 }
899 
900 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
901 #define TARGET_NODE_CASE(NAME)                                                 \
902   case VEISD::NAME:                                                            \
903     return "VEISD::" #NAME;
904   switch ((VEISD::NodeType)Opcode) {
905   case VEISD::FIRST_NUMBER:
906     break;
907     TARGET_NODE_CASE(CALL)
908     TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
909     TARGET_NODE_CASE(EH_SJLJ_SETJMP)
910     TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
911     TARGET_NODE_CASE(GETFUNPLT)
912     TARGET_NODE_CASE(GETSTACKTOP)
913     TARGET_NODE_CASE(GETTLSADDR)
914     TARGET_NODE_CASE(GLOBAL_BASE_REG)
915     TARGET_NODE_CASE(Hi)
916     TARGET_NODE_CASE(Lo)
917     TARGET_NODE_CASE(MEMBARRIER)
918     TARGET_NODE_CASE(RET_FLAG)
919     TARGET_NODE_CASE(TS1AM)
920     TARGET_NODE_CASE(VEC_UNPACK_LO)
921     TARGET_NODE_CASE(VEC_UNPACK_HI)
922     TARGET_NODE_CASE(VEC_PACK)
923     TARGET_NODE_CASE(VEC_BROADCAST)
924     TARGET_NODE_CASE(REPL_I32)
925     TARGET_NODE_CASE(REPL_F32)
926 
927     TARGET_NODE_CASE(LEGALAVL)
928 
929     // Register the VVP_* SDNodes.
930 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
931 #include "VVPNodes.def"
932   }
933 #undef TARGET_NODE_CASE
934   return nullptr;
935 }
936 
937 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
938                                          EVT VT) const {
939   return MVT::i32;
940 }
941 
942 // Convert to a target node and set target flags.
943 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
944                                           SelectionDAG &DAG) const {
945   if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
946     return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
947                                       GA->getValueType(0), GA->getOffset(), TF);
948 
949   if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
950     return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
951                                      0, TF);
952 
953   if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
954     return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
955                                      CP->getAlign(), CP->getOffset(), TF);
956 
957   if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
958     return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
959                                        TF);
960 
961   if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
962     return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
963 
964   llvm_unreachable("Unhandled address SDNode");
965 }
966 
967 // Split Op into high and low parts according to HiTF and LoTF.
968 // Return an ADD node combining the parts.
969 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
970                                        SelectionDAG &DAG) const {
971   SDLoc DL(Op);
972   EVT VT = Op.getValueType();
973   SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
974   SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
975   return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
976 }
977 
978 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
979 // or ExternalSymbol SDNode.
980 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
981   SDLoc DL(Op);
982   EVT PtrVT = Op.getValueType();
983 
984   // Handle PIC mode first. VE needs a got load for every variable!
985   if (isPositionIndependent()) {
986     auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
987 
988     if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
989         (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
990       // Create following instructions for local linkage PIC code.
991       //     lea %reg, label@gotoff_lo
992       //     and %reg, %reg, (32)0
993       //     lea.sl %reg, label@gotoff_hi(%reg, %got)
994       SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
995                                   VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
996       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
997       return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
998     }
999     // Create following instructions for not local linkage PIC code.
1000     //     lea %reg, label@got_lo
1001     //     and %reg, %reg, (32)0
1002     //     lea.sl %reg, label@got_hi(%reg)
1003     //     ld %reg, (%reg, %got)
1004     SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
1005                                 VEMCExpr::VK_VE_GOT_LO32, DAG);
1006     SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1007     SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1008     return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
1009                        MachinePointerInfo::getGOT(DAG.getMachineFunction()));
1010   }
1011 
1012   // This is one of the absolute code models.
1013   switch (getTargetMachine().getCodeModel()) {
1014   default:
1015     llvm_unreachable("Unsupported absolute code model");
1016   case CodeModel::Small:
1017   case CodeModel::Medium:
1018   case CodeModel::Large:
1019     // abs64.
1020     return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1021   }
1022 }
1023 
1024 /// Custom Lower {
1025 
1026 // The mappings for emitLeading/TrailingFence for VE is designed by following
1027 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1028 Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1029                                                 Instruction *Inst,
1030                                                 AtomicOrdering Ord) const {
1031   switch (Ord) {
1032   case AtomicOrdering::NotAtomic:
1033   case AtomicOrdering::Unordered:
1034     llvm_unreachable("Invalid fence: unordered/non-atomic");
1035   case AtomicOrdering::Monotonic:
1036   case AtomicOrdering::Acquire:
1037     return nullptr; // Nothing to do
1038   case AtomicOrdering::Release:
1039   case AtomicOrdering::AcquireRelease:
1040     return Builder.CreateFence(AtomicOrdering::Release);
1041   case AtomicOrdering::SequentiallyConsistent:
1042     if (!Inst->hasAtomicStore())
1043       return nullptr; // Nothing to do
1044     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1045   }
1046   llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1047 }
1048 
1049 Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1050                                                  Instruction *Inst,
1051                                                  AtomicOrdering Ord) const {
1052   switch (Ord) {
1053   case AtomicOrdering::NotAtomic:
1054   case AtomicOrdering::Unordered:
1055     llvm_unreachable("Invalid fence: unordered/not-atomic");
1056   case AtomicOrdering::Monotonic:
1057   case AtomicOrdering::Release:
1058     return nullptr; // Nothing to do
1059   case AtomicOrdering::Acquire:
1060   case AtomicOrdering::AcquireRelease:
1061     return Builder.CreateFence(AtomicOrdering::Acquire);
1062   case AtomicOrdering::SequentiallyConsistent:
1063     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1064   }
1065   llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1066 }
1067 
1068 SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1069                                             SelectionDAG &DAG) const {
1070   SDLoc DL(Op);
1071   AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
1072       cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
1073   SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
1074       cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
1075 
1076   // VE uses Release consistency, so need a fence instruction if it is a
1077   // cross-thread fence.
1078   if (FenceSSID == SyncScope::System) {
1079     switch (FenceOrdering) {
1080     case AtomicOrdering::NotAtomic:
1081     case AtomicOrdering::Unordered:
1082     case AtomicOrdering::Monotonic:
1083       // No need to generate fencem instruction here.
1084       break;
1085     case AtomicOrdering::Acquire:
1086       // Generate "fencem 2" as acquire fence.
1087       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1088                                         DAG.getTargetConstant(2, DL, MVT::i32),
1089                                         Op.getOperand(0)),
1090                      0);
1091     case AtomicOrdering::Release:
1092       // Generate "fencem 1" as release fence.
1093       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1094                                         DAG.getTargetConstant(1, DL, MVT::i32),
1095                                         Op.getOperand(0)),
1096                      0);
1097     case AtomicOrdering::AcquireRelease:
1098     case AtomicOrdering::SequentiallyConsistent:
1099       // Generate "fencem 3" as acq_rel and seq_cst fence.
1100       // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
1101       //        so  seq_cst may require more instruction for them.
1102       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1103                                         DAG.getTargetConstant(3, DL, MVT::i32),
1104                                         Op.getOperand(0)),
1105                      0);
1106     }
1107   }
1108 
1109   // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1110   return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1111 }
1112 
1113 TargetLowering::AtomicExpansionKind
1114 VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1115   // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1116   if (AI->getOperation() == AtomicRMWInst::Xchg) {
1117     return AtomicExpansionKind::None;
1118   }
1119   // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1120 
1121   // Otherwise, expand it using compare and exchange instruction to not call
1122   // __sync_fetch_and_* functions.
1123   return AtomicExpansionKind::CmpXChg;
1124 }
1125 
1126 static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1127                             SDValue &Bits) {
1128   SDLoc DL(Op);
1129   AtomicSDNode *N = cast<AtomicSDNode>(Op);
1130   SDValue Ptr = N->getOperand(1);
1131   SDValue Val = N->getOperand(2);
1132   EVT PtrVT = Ptr.getValueType();
1133   bool Byte = N->getMemoryVT() == MVT::i8;
1134   //   Remainder = AND Ptr, 3
1135   //   Flag = 1 << Remainder  ; If Byte is true (1 byte swap flag)
1136   //   Flag = 3 << Remainder  ; If Byte is false (2 bytes swap flag)
1137   //   Bits = Remainder << 3
1138   //   NewVal = Val << Bits
1139   SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1140   SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1141   SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1142                       : DAG.getConstant(3, DL, MVT::i32);
1143   Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1144   Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1145   return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1146 }
1147 
1148 static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1149                              SDValue Bits) {
1150   SDLoc DL(Op);
1151   EVT VT = Data.getValueType();
1152   bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1153   //   NewData = Data >> Bits
1154   //   Result = NewData & 0xff   ; If Byte is true (1 byte)
1155   //   Result = NewData & 0xffff ; If Byte is false (2 bytes)
1156 
1157   SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1158   return DAG.getNode(ISD::AND, DL, VT,
1159                      {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1160 }
1161 
1162 SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1163                                            SelectionDAG &DAG) const {
1164   SDLoc DL(Op);
1165   AtomicSDNode *N = cast<AtomicSDNode>(Op);
1166 
1167   if (N->getMemoryVT() == MVT::i8) {
1168     // For i8, use "ts1am"
1169     //   Input:
1170     //     ATOMIC_SWAP Ptr, Val, Order
1171     //
1172     //   Output:
1173     //     Remainder = AND Ptr, 3
1174     //     Flag = 1 << Remainder   ; 1 byte swap flag for TS1AM inst.
1175     //     Bits = Remainder << 3
1176     //     NewVal = Val << Bits
1177     //
1178     //     Aligned = AND Ptr, -4
1179     //     Data = TS1AM Aligned, Flag, NewVal
1180     //
1181     //     NewData = Data >> Bits
1182     //     Result = NewData & 0xff ; 1 byte result
1183     SDValue Flag;
1184     SDValue Bits;
1185     SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1186 
1187     SDValue Ptr = N->getOperand(1);
1188     SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1189                                   {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1190     SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1191                                   DAG.getVTList(Op.getNode()->getValueType(0),
1192                                                 Op.getNode()->getValueType(1)),
1193                                   {N->getChain(), Aligned, Flag, NewVal},
1194                                   N->getMemOperand());
1195 
1196     SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1197     SDValue Chain = TS1AM.getValue(1);
1198     return DAG.getMergeValues({Result, Chain}, DL);
1199   }
1200   if (N->getMemoryVT() == MVT::i16) {
1201     // For i16, use "ts1am"
1202     SDValue Flag;
1203     SDValue Bits;
1204     SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1205 
1206     SDValue Ptr = N->getOperand(1);
1207     SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1208                                   {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1209     SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1210                                   DAG.getVTList(Op.getNode()->getValueType(0),
1211                                                 Op.getNode()->getValueType(1)),
1212                                   {N->getChain(), Aligned, Flag, NewVal},
1213                                   N->getMemOperand());
1214 
1215     SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1216     SDValue Chain = TS1AM.getValue(1);
1217     return DAG.getMergeValues({Result, Chain}, DL);
1218   }
1219   // Otherwise, let llvm legalize it.
1220   return Op;
1221 }
1222 
1223 SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1224                                              SelectionDAG &DAG) const {
1225   return makeAddress(Op, DAG);
1226 }
1227 
1228 SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1229                                             SelectionDAG &DAG) const {
1230   return makeAddress(Op, DAG);
1231 }
1232 
1233 SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1234                                             SelectionDAG &DAG) const {
1235   return makeAddress(Op, DAG);
1236 }
1237 
1238 SDValue
1239 VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1240                                                 SelectionDAG &DAG) const {
1241   SDLoc DL(Op);
1242 
1243   // Generate the following code:
1244   //   t1: ch,glue = callseq_start t0, 0, 0
1245   //   t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1246   //   t3: ch,glue = callseq_end t2, 0, 0, t2:2
1247   //   t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1248   SDValue Label = withTargetFlags(Op, 0, DAG);
1249   EVT PtrVT = Op.getValueType();
1250 
1251   // Lowering the machine isd will make sure everything is in the right
1252   // location.
1253   SDValue Chain = DAG.getEntryNode();
1254   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1255   const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1256       DAG.getMachineFunction(), CallingConv::C);
1257   Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1258   SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1259   Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1260   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, DL, true),
1261                              DAG.getIntPtrConstant(0, DL, true),
1262                              Chain.getValue(1), DL);
1263   Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1264 
1265   // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1266   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1267   MFI.setHasCalls(true);
1268 
1269   // Also generate code to prepare a GOT register if it is PIC.
1270   if (isPositionIndependent()) {
1271     MachineFunction &MF = DAG.getMachineFunction();
1272     Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1273   }
1274 
1275   return Chain;
1276 }
1277 
1278 SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1279                                                 SelectionDAG &DAG) const {
1280   // The current implementation of nld (2.26) doesn't allow local exec model
1281   // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1282   // generate the general dynamic model code sequence.
1283   //
1284   // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1285   return lowerToTLSGeneralDynamicModel(Op, DAG);
1286 }
1287 
1288 SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1289   return makeAddress(Op, DAG);
1290 }
1291 
1292 // Lower a f128 load into two f64 loads.
1293 static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1294   SDLoc DL(Op);
1295   LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1296   assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1297   unsigned Alignment = LdNode->getAlign().value();
1298   if (Alignment > 8)
1299     Alignment = 8;
1300 
1301   SDValue Lo64 =
1302       DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1303                   LdNode->getPointerInfo(), Alignment,
1304                   LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1305                                        : MachineMemOperand::MONone);
1306   EVT AddrVT = LdNode->getBasePtr().getValueType();
1307   SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1308                               DAG.getConstant(8, DL, AddrVT));
1309   SDValue Hi64 =
1310       DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1311                   LdNode->getPointerInfo(), Alignment,
1312                   LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1313                                        : MachineMemOperand::MONone);
1314 
1315   SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1316   SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1317 
1318   // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1319   SDNode *InFP128 =
1320       DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1321   InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1322                                SDValue(InFP128, 0), Hi64, SubRegEven);
1323   InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1324                                SDValue(InFP128, 0), Lo64, SubRegOdd);
1325   SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1326                           SDValue(Hi64.getNode(), 1)};
1327   SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1328   SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1329   return DAG.getMergeValues(Ops, DL);
1330 }
1331 
1332 SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1333   LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1334 
1335   EVT MemVT = LdNode->getMemoryVT();
1336 
1337   // Dispatch to vector isel.
1338   if (MemVT.isVector() && !isMaskType(MemVT))
1339     return lowerToVVP(Op, DAG);
1340 
1341   SDValue BasePtr = LdNode->getBasePtr();
1342   if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1343     // Do not expand store instruction with frame index here because of
1344     // dependency problems.  We expand it later in eliminateFrameIndex().
1345     return Op;
1346   }
1347 
1348   if (MemVT == MVT::f128)
1349     return lowerLoadF128(Op, DAG);
1350 
1351   return Op;
1352 }
1353 
1354 // Lower a f128 store into two f64 stores.
1355 static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1356   SDLoc DL(Op);
1357   StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1358   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1359 
1360   SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1361   SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1362 
1363   SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1364                                     StNode->getValue(), SubRegEven);
1365   SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1366                                     StNode->getValue(), SubRegOdd);
1367 
1368   unsigned Alignment = StNode->getAlign().value();
1369   if (Alignment > 8)
1370     Alignment = 8;
1371 
1372   // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1373   SDValue OutChains[2];
1374   OutChains[0] =
1375       DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1376                    StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1377                    StNode->isVolatile() ? MachineMemOperand::MOVolatile
1378                                         : MachineMemOperand::MONone);
1379   EVT AddrVT = StNode->getBasePtr().getValueType();
1380   SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1381                               DAG.getConstant(8, DL, AddrVT));
1382   OutChains[1] =
1383       DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1384                    MachinePointerInfo(), Alignment,
1385                    StNode->isVolatile() ? MachineMemOperand::MOVolatile
1386                                         : MachineMemOperand::MONone);
1387   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1388 }
1389 
1390 SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1391   StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1392   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1393 
1394     // always expand non-mask vector loads to VVP
1395   EVT MemVT = StNode->getMemoryVT();
1396   if (MemVT.isVector() && !isMaskType(MemVT))
1397     return lowerToVVP(Op, DAG);
1398 
1399   SDValue BasePtr = StNode->getBasePtr();
1400   if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1401     // Do not expand store instruction with frame index here because of
1402     // dependency problems.  We expand it later in eliminateFrameIndex().
1403     return Op;
1404   }
1405 
1406   if (MemVT == MVT::f128)
1407     return lowerStoreF128(Op, DAG);
1408 
1409   // Otherwise, ask llvm to expand it.
1410   return SDValue();
1411 }
1412 
1413 SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1414   MachineFunction &MF = DAG.getMachineFunction();
1415   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1416   auto PtrVT = getPointerTy(DAG.getDataLayout());
1417 
1418   // Need frame address to find the address of VarArgsFrameIndex.
1419   MF.getFrameInfo().setFrameAddressIsTaken(true);
1420 
1421   // vastart just stores the address of the VarArgsFrameIndex slot into the
1422   // memory location argument.
1423   SDLoc DL(Op);
1424   SDValue Offset =
1425       DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1426                   DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1427   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1428   return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1429                       MachinePointerInfo(SV));
1430 }
1431 
1432 SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1433   SDNode *Node = Op.getNode();
1434   EVT VT = Node->getValueType(0);
1435   SDValue InChain = Node->getOperand(0);
1436   SDValue VAListPtr = Node->getOperand(1);
1437   EVT PtrVT = VAListPtr.getValueType();
1438   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1439   SDLoc DL(Node);
1440   SDValue VAList =
1441       DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1442   SDValue Chain = VAList.getValue(1);
1443   SDValue NextPtr;
1444 
1445   if (VT == MVT::f128) {
1446     // VE f128 values must be stored with 16 bytes alignment.  We doesn't
1447     // know the actual alignment of VAList, so we take alignment of it
1448     // dyanmically.
1449     int Align = 16;
1450     VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1451                          DAG.getConstant(Align - 1, DL, PtrVT));
1452     VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1453                          DAG.getConstant(-Align, DL, PtrVT));
1454     // Increment the pointer, VAList, by 16 to the next vaarg.
1455     NextPtr =
1456         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1457   } else if (VT == MVT::f32) {
1458     // float --> need special handling like below.
1459     //    0      4
1460     //    +------+------+
1461     //    | empty| float|
1462     //    +------+------+
1463     // Increment the pointer, VAList, by 8 to the next vaarg.
1464     NextPtr =
1465         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1466     // Then, adjust VAList.
1467     unsigned InternalOffset = 4;
1468     VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1469                          DAG.getConstant(InternalOffset, DL, PtrVT));
1470   } else {
1471     // Increment the pointer, VAList, by 8 to the next vaarg.
1472     NextPtr =
1473         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1474   }
1475 
1476   // Store the incremented VAList to the legalized pointer.
1477   InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1478 
1479   // Load the actual argument out of the pointer VAList.
1480   // We can't count on greater alignment than the word size.
1481   return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
1482                      std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
1483 }
1484 
1485 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1486                                                   SelectionDAG &DAG) const {
1487   // Generate following code.
1488   //   (void)__llvm_grow_stack(size);
1489   //   ret = GETSTACKTOP;        // pseudo instruction
1490   SDLoc DL(Op);
1491 
1492   // Get the inputs.
1493   SDNode *Node = Op.getNode();
1494   SDValue Chain = Op.getOperand(0);
1495   SDValue Size = Op.getOperand(1);
1496   MaybeAlign Alignment(Op.getConstantOperandVal(2));
1497   EVT VT = Node->getValueType(0);
1498 
1499   // Chain the dynamic stack allocation so that it doesn't modify the stack
1500   // pointer when other instructions are using the stack.
1501   Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1502 
1503   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1504   Align StackAlign = TFI.getStackAlign();
1505   bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1506 
1507   // Prepare arguments
1508   TargetLowering::ArgListTy Args;
1509   TargetLowering::ArgListEntry Entry;
1510   Entry.Node = Size;
1511   Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1512   Args.push_back(Entry);
1513   if (NeedsAlign) {
1514     Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1515     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1516     Args.push_back(Entry);
1517   }
1518   Type *RetTy = Type::getVoidTy(*DAG.getContext());
1519 
1520   EVT PtrVT = Op.getValueType();
1521   SDValue Callee;
1522   if (NeedsAlign) {
1523     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1524   } else {
1525     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1526   }
1527 
1528   TargetLowering::CallLoweringInfo CLI(DAG);
1529   CLI.setDebugLoc(DL)
1530       .setChain(Chain)
1531       .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1532       .setDiscardResult(true);
1533   std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1534   Chain = pair.second;
1535   SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1536   if (NeedsAlign) {
1537     Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1538                          DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1539     Result = DAG.getNode(ISD::AND, DL, VT, Result,
1540                          DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1541   }
1542   //  Chain = Result.getValue(1);
1543   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
1544                              DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
1545 
1546   SDValue Ops[2] = {Result, Chain};
1547   return DAG.getMergeValues(Ops, DL);
1548 }
1549 
1550 SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1551                                                SelectionDAG &DAG) const {
1552   SDLoc DL(Op);
1553   return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1554                      Op.getOperand(1));
1555 }
1556 
1557 SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1558                                               SelectionDAG &DAG) const {
1559   SDLoc DL(Op);
1560   return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1561                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1562                      Op.getOperand(1));
1563 }
1564 
1565 SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1566                                                       SelectionDAG &DAG) const {
1567   SDLoc DL(Op);
1568   return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1569                      Op.getOperand(0));
1570 }
1571 
1572 static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1573                               const VETargetLowering &TLI,
1574                               const VESubtarget *Subtarget) {
1575   SDLoc DL(Op);
1576   MachineFunction &MF = DAG.getMachineFunction();
1577   EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1578 
1579   MachineFrameInfo &MFI = MF.getFrameInfo();
1580   MFI.setFrameAddressIsTaken(true);
1581 
1582   unsigned Depth = Op.getConstantOperandVal(0);
1583   const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1584   Register FrameReg = RegInfo->getFrameRegister(MF);
1585   SDValue FrameAddr =
1586       DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1587   while (Depth--)
1588     FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1589                             FrameAddr, MachinePointerInfo());
1590   return FrameAddr;
1591 }
1592 
1593 static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1594                                const VETargetLowering &TLI,
1595                                const VESubtarget *Subtarget) {
1596   MachineFunction &MF = DAG.getMachineFunction();
1597   MachineFrameInfo &MFI = MF.getFrameInfo();
1598   MFI.setReturnAddressIsTaken(true);
1599 
1600   if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1601     return SDValue();
1602 
1603   SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1604 
1605   SDLoc DL(Op);
1606   EVT VT = Op.getValueType();
1607   SDValue Offset = DAG.getConstant(8, DL, VT);
1608   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1609                      DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1610                      MachinePointerInfo());
1611 }
1612 
1613 SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1614                                                   SelectionDAG &DAG) const {
1615   SDLoc DL(Op);
1616   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1617   switch (IntNo) {
1618   default: // Don't custom lower most intrinsics.
1619     return SDValue();
1620   case Intrinsic::eh_sjlj_lsda: {
1621     MachineFunction &MF = DAG.getMachineFunction();
1622     MVT VT = Op.getSimpleValueType();
1623     const VETargetMachine *TM =
1624         static_cast<const VETargetMachine *>(&DAG.getTarget());
1625 
1626     // Create GCC_except_tableXX string.  The real symbol for that will be
1627     // generated in EHStreamer::emitExceptionTable() later.  So, we just
1628     // borrow it's name here.
1629     TM->getStrList()->push_back(std::string(
1630         (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1631     SDValue Addr =
1632         DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1633     if (isPositionIndependent()) {
1634       Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
1635                           VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1636       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1637       return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1638     }
1639     return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1640   }
1641   }
1642 }
1643 
1644 static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1645   if (!isa<BuildVectorSDNode>(N))
1646     return false;
1647   const auto *BVN = cast<BuildVectorSDNode>(N);
1648 
1649   // Find first non-undef insertion.
1650   unsigned Idx;
1651   for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1652     auto ElemV = BVN->getOperand(Idx);
1653     if (!ElemV->isUndef())
1654       break;
1655   }
1656   // Catch the (hypothetical) all-undef case.
1657   if (Idx == BVN->getNumOperands())
1658     return false;
1659   // Remember insertion.
1660   UniqueIdx = Idx++;
1661   // Verify that all other insertions are undef.
1662   for (; Idx < BVN->getNumOperands(); ++Idx) {
1663     auto ElemV = BVN->getOperand(Idx);
1664     if (!ElemV->isUndef())
1665       return false;
1666   }
1667   return true;
1668 }
1669 
1670 static SDValue getSplatValue(SDNode *N) {
1671   if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1672     return BuildVec->getSplatValue();
1673   }
1674   return SDValue();
1675 }
1676 
1677 SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1678                                             SelectionDAG &DAG) const {
1679   VECustomDAG CDAG(DAG, Op);
1680   MVT ResultVT = Op.getSimpleValueType();
1681 
1682   // If there is just one element, expand to INSERT_VECTOR_ELT.
1683   unsigned UniqueIdx;
1684   if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1685     SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
1686     auto ElemV = Op->getOperand(UniqueIdx);
1687     SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
1688     return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
1689   }
1690 
1691   // Else emit a broadcast.
1692   if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1693     unsigned NumEls = ResultVT.getVectorNumElements();
1694     auto AVL = CDAG.getConstant(NumEls, MVT::i32);
1695     return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
1696   }
1697 
1698   // Expand
1699   return SDValue();
1700 }
1701 
1702 TargetLowering::LegalizeAction
1703 VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1704   // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1705   // these operations (transform nodes such that their AVL parameter refers to
1706   // packs of 64bit, instead of number of elements.
1707 
1708   // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1709   // re-visit them.
1710   if (isPackingSupportOpcode(Op.getOpcode()))
1711     return Legal;
1712 
1713   // Custom lower to legalize AVL for packed mode.
1714   if (isVVPOrVEC(Op.getOpcode()))
1715     return Custom;
1716   return Legal;
1717 }
1718 
1719 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1720   LLVM_DEBUG(dbgs() << "::LowerOperation"; Op->print(dbgs()););
1721   unsigned Opcode = Op.getOpcode();
1722 
1723   /// Scalar isel.
1724   switch (Opcode) {
1725   case ISD::ATOMIC_FENCE:
1726     return lowerATOMIC_FENCE(Op, DAG);
1727   case ISD::ATOMIC_SWAP:
1728     return lowerATOMIC_SWAP(Op, DAG);
1729   case ISD::BlockAddress:
1730     return lowerBlockAddress(Op, DAG);
1731   case ISD::ConstantPool:
1732     return lowerConstantPool(Op, DAG);
1733   case ISD::DYNAMIC_STACKALLOC:
1734     return lowerDYNAMIC_STACKALLOC(Op, DAG);
1735   case ISD::EH_SJLJ_LONGJMP:
1736     return lowerEH_SJLJ_LONGJMP(Op, DAG);
1737   case ISD::EH_SJLJ_SETJMP:
1738     return lowerEH_SJLJ_SETJMP(Op, DAG);
1739   case ISD::EH_SJLJ_SETUP_DISPATCH:
1740     return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1741   case ISD::FRAMEADDR:
1742     return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1743   case ISD::GlobalAddress:
1744     return lowerGlobalAddress(Op, DAG);
1745   case ISD::GlobalTLSAddress:
1746     return lowerGlobalTLSAddress(Op, DAG);
1747   case ISD::INTRINSIC_WO_CHAIN:
1748     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1749   case ISD::JumpTable:
1750     return lowerJumpTable(Op, DAG);
1751   case ISD::LOAD:
1752     return lowerLOAD(Op, DAG);
1753   case ISD::RETURNADDR:
1754     return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1755   case ISD::BUILD_VECTOR:
1756     return lowerBUILD_VECTOR(Op, DAG);
1757   case ISD::STORE:
1758     return lowerSTORE(Op, DAG);
1759   case ISD::VASTART:
1760     return lowerVASTART(Op, DAG);
1761   case ISD::VAARG:
1762     return lowerVAARG(Op, DAG);
1763 
1764   case ISD::INSERT_VECTOR_ELT:
1765     return lowerINSERT_VECTOR_ELT(Op, DAG);
1766   case ISD::EXTRACT_VECTOR_ELT:
1767     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1768   }
1769 
1770   /// Vector isel.
1771   LLVM_DEBUG(dbgs() << "::LowerOperation_VVP"; Op->print(dbgs()););
1772   if (ISD::isVPOpcode(Opcode))
1773     return lowerToVVP(Op, DAG);
1774 
1775   switch (Opcode) {
1776   default:
1777     llvm_unreachable("Should not custom lower this!");
1778 
1779   // Legalize the AVL of this internal node.
1780   case VEISD::VEC_BROADCAST:
1781 #define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1782 #include "VVPNodes.def"
1783     // AVL already legalized.
1784     if (getAnnotatedNodeAVL(Op).second)
1785       return Op;
1786     return legalizeInternalVectorOp(Op, DAG);
1787 
1788     // Translate into a VEC_*/VVP_* layer operation.
1789   case ISD::MLOAD:
1790   case ISD::MSTORE:
1791 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1792 #include "VVPNodes.def"
1793     if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
1794       return splitMaskArithmetic(Op, DAG);
1795     return lowerToVVP(Op, DAG);
1796   }
1797 }
1798 /// } Custom Lower
1799 
1800 void VETargetLowering::ReplaceNodeResults(SDNode *N,
1801                                           SmallVectorImpl<SDValue> &Results,
1802                                           SelectionDAG &DAG) const {
1803   switch (N->getOpcode()) {
1804   case ISD::ATOMIC_SWAP:
1805     // Let LLVM expand atomic swap instruction through LowerOperation.
1806     return;
1807   default:
1808     LLVM_DEBUG(N->dumpr(&DAG));
1809     llvm_unreachable("Do not know how to custom type legalize this operation!");
1810   }
1811 }
1812 
1813 /// JumpTable for VE.
1814 ///
1815 ///   VE cannot generate relocatable symbol in jump table.  VE cannot
1816 ///   generate expressions using symbols in both text segment and data
1817 ///   segment like below.
1818 ///             .4byte  .LBB0_2-.LJTI0_0
1819 ///   So, we generate offset from the top of function like below as
1820 ///   a custom label.
1821 ///             .4byte  .LBB0_2-<function name>
1822 
1823 unsigned VETargetLowering::getJumpTableEncoding() const {
1824   // Use custom label for PIC.
1825   if (isPositionIndependent())
1826     return MachineJumpTableInfo::EK_Custom32;
1827 
1828   // Otherwise, use the normal jump table encoding heuristics.
1829   return TargetLowering::getJumpTableEncoding();
1830 }
1831 
1832 const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1833     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1834     unsigned Uid, MCContext &Ctx) const {
1835   assert(isPositionIndependent());
1836 
1837   // Generate custom label for PIC like below.
1838   //    .4bytes  .LBB0_2-<function name>
1839   const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1840   MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1841   const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1842   return MCBinaryExpr::createSub(Value, Base, Ctx);
1843 }
1844 
1845 SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1846                                                    SelectionDAG &DAG) const {
1847   assert(isPositionIndependent());
1848   SDLoc DL(Table);
1849   Function *Function = &DAG.getMachineFunction().getFunction();
1850   assert(Function != nullptr);
1851   auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
1852 
1853   // In the jump table, we have following values in PIC mode.
1854   //    .4bytes  .LBB0_2-<function name>
1855   // We need to add this value and the address of this function to generate
1856   // .LBB0_2 label correctly under PIC mode.  So, we want to generate following
1857   // instructions:
1858   //     lea %reg, fun@gotoff_lo
1859   //     and %reg, %reg, (32)0
1860   //     lea.sl %reg, fun@gotoff_hi(%reg, %got)
1861   // In order to do so, we need to genarate correctly marked DAG node using
1862   // makeHiLoPair.
1863   SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
1864   SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
1865                               VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1866   SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
1867   return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
1868 }
1869 
1870 Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
1871                                       MachineBasicBlock::iterator I,
1872                                       MachineBasicBlock *TargetBB,
1873                                       const DebugLoc &DL) const {
1874   MachineFunction *MF = MBB.getParent();
1875   MachineRegisterInfo &MRI = MF->getRegInfo();
1876   const VEInstrInfo *TII = Subtarget->getInstrInfo();
1877 
1878   const TargetRegisterClass *RC = &VE::I64RegClass;
1879   Register Tmp1 = MRI.createVirtualRegister(RC);
1880   Register Tmp2 = MRI.createVirtualRegister(RC);
1881   Register Result = MRI.createVirtualRegister(RC);
1882 
1883   if (isPositionIndependent()) {
1884     // Create following instructions for local linkage PIC code.
1885     //     lea %Tmp1, TargetBB@gotoff_lo
1886     //     and %Tmp2, %Tmp1, (32)0
1887     //     lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1888     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1889         .addImm(0)
1890         .addImm(0)
1891         .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
1892     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1893         .addReg(Tmp1, getKillRegState(true))
1894         .addImm(M0(32));
1895     BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
1896         .addReg(VE::SX15)
1897         .addReg(Tmp2, getKillRegState(true))
1898         .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
1899   } else {
1900     // Create following instructions for non-PIC code.
1901     //     lea     %Tmp1, TargetBB@lo
1902     //     and     %Tmp2, %Tmp1, (32)0
1903     //     lea.sl  %Result, TargetBB@hi(%Tmp2)
1904     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1905         .addImm(0)
1906         .addImm(0)
1907         .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
1908     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1909         .addReg(Tmp1, getKillRegState(true))
1910         .addImm(M0(32));
1911     BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
1912         .addReg(Tmp2, getKillRegState(true))
1913         .addImm(0)
1914         .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
1915   }
1916   return Result;
1917 }
1918 
1919 Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
1920                                          MachineBasicBlock::iterator I,
1921                                          StringRef Symbol, const DebugLoc &DL,
1922                                          bool IsLocal = false,
1923                                          bool IsCall = false) const {
1924   MachineFunction *MF = MBB.getParent();
1925   MachineRegisterInfo &MRI = MF->getRegInfo();
1926   const VEInstrInfo *TII = Subtarget->getInstrInfo();
1927 
1928   const TargetRegisterClass *RC = &VE::I64RegClass;
1929   Register Result = MRI.createVirtualRegister(RC);
1930 
1931   if (isPositionIndependent()) {
1932     if (IsCall && !IsLocal) {
1933       // Create following instructions for non-local linkage PIC code function
1934       // calls.  These instructions uses IC and magic number -24, so we expand
1935       // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
1936       //     lea %Reg, Symbol@plt_lo(-24)
1937       //     and %Reg, %Reg, (32)0
1938       //     sic %s16
1939       //     lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
1940       BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
1941           .addExternalSymbol("abort");
1942     } else if (IsLocal) {
1943       Register Tmp1 = MRI.createVirtualRegister(RC);
1944       Register Tmp2 = MRI.createVirtualRegister(RC);
1945       // Create following instructions for local linkage PIC code.
1946       //     lea %Tmp1, Symbol@gotoff_lo
1947       //     and %Tmp2, %Tmp1, (32)0
1948       //     lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1949       BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1950           .addImm(0)
1951           .addImm(0)
1952           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
1953       BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1954           .addReg(Tmp1, getKillRegState(true))
1955           .addImm(M0(32));
1956       BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
1957           .addReg(VE::SX15)
1958           .addReg(Tmp2, getKillRegState(true))
1959           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
1960     } else {
1961       Register Tmp1 = MRI.createVirtualRegister(RC);
1962       Register Tmp2 = MRI.createVirtualRegister(RC);
1963       // Create following instructions for not local linkage PIC code.
1964       //     lea %Tmp1, Symbol@got_lo
1965       //     and %Tmp2, %Tmp1, (32)0
1966       //     lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1967       //     ld %Result, 0(%Tmp3)
1968       Register Tmp3 = MRI.createVirtualRegister(RC);
1969       BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1970           .addImm(0)
1971           .addImm(0)
1972           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
1973       BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1974           .addReg(Tmp1, getKillRegState(true))
1975           .addImm(M0(32));
1976       BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
1977           .addReg(VE::SX15)
1978           .addReg(Tmp2, getKillRegState(true))
1979           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
1980       BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
1981           .addReg(Tmp3, getKillRegState(true))
1982           .addImm(0)
1983           .addImm(0);
1984     }
1985   } else {
1986     Register Tmp1 = MRI.createVirtualRegister(RC);
1987     Register Tmp2 = MRI.createVirtualRegister(RC);
1988     // Create following instructions for non-PIC code.
1989     //     lea     %Tmp1, Symbol@lo
1990     //     and     %Tmp2, %Tmp1, (32)0
1991     //     lea.sl  %Result, Symbol@hi(%Tmp2)
1992     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1993         .addImm(0)
1994         .addImm(0)
1995         .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
1996     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1997         .addReg(Tmp1, getKillRegState(true))
1998         .addImm(M0(32));
1999     BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2000         .addReg(Tmp2, getKillRegState(true))
2001         .addImm(0)
2002         .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
2003   }
2004   return Result;
2005 }
2006 
2007 void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2008                                               MachineBasicBlock *MBB,
2009                                               MachineBasicBlock *DispatchBB,
2010                                               int FI, int Offset) const {
2011   DebugLoc DL = MI.getDebugLoc();
2012   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2013 
2014   Register LabelReg =
2015       prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
2016 
2017   // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2018   // referenced by longjmp (throw) later.
2019   MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2020   addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2021   MIB.addReg(LabelReg, getKillRegState(true));
2022 }
2023 
2024 MachineBasicBlock *
2025 VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2026                                    MachineBasicBlock *MBB) const {
2027   DebugLoc DL = MI.getDebugLoc();
2028   MachineFunction *MF = MBB->getParent();
2029   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2030   const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2031   MachineRegisterInfo &MRI = MF->getRegInfo();
2032 
2033   const BasicBlock *BB = MBB->getBasicBlock();
2034   MachineFunction::iterator I = ++MBB->getIterator();
2035 
2036   // Memory Reference.
2037   SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2038                                            MI.memoperands_end());
2039   Register BufReg = MI.getOperand(1).getReg();
2040 
2041   Register DstReg;
2042 
2043   DstReg = MI.getOperand(0).getReg();
2044   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
2045   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2046   (void)TRI;
2047   Register MainDestReg = MRI.createVirtualRegister(RC);
2048   Register RestoreDestReg = MRI.createVirtualRegister(RC);
2049 
2050   // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2051   // instructions.  SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2052   //
2053   // ThisMBB:
2054   //   buf[3] = %s17 iff %s17 is used as BP
2055   //   buf[1] = RestoreMBB as IC after longjmp
2056   //   # SjLjSetup RestoreMBB
2057   //
2058   // MainMBB:
2059   //   v_main = 0
2060   //
2061   // SinkMBB:
2062   //   v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2063   //   ...
2064   //
2065   // RestoreMBB:
2066   //   %s17 = buf[3] = iff %s17 is used as BP
2067   //   v_restore = 1
2068   //   goto SinkMBB
2069 
2070   MachineBasicBlock *ThisMBB = MBB;
2071   MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2072   MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2073   MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2074   MF->insert(I, MainMBB);
2075   MF->insert(I, SinkMBB);
2076   MF->push_back(RestoreMBB);
2077   RestoreMBB->setHasAddressTaken();
2078 
2079   // Transfer the remainder of BB and its successor edges to SinkMBB.
2080   SinkMBB->splice(SinkMBB->begin(), MBB,
2081                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2082   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2083 
2084   // ThisMBB:
2085   Register LabelReg =
2086       prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
2087 
2088   // Store BP in buf[3] iff this function is using BP.
2089   const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2090   if (TFI->hasBP(*MF)) {
2091     MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2092     MIB.addReg(BufReg);
2093     MIB.addImm(0);
2094     MIB.addImm(24);
2095     MIB.addReg(VE::SX17);
2096     MIB.setMemRefs(MMOs);
2097   }
2098 
2099   // Store IP in buf[1].
2100   MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2101   MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2102   MIB.addImm(0);
2103   MIB.addImm(8);
2104   MIB.addReg(LabelReg, getKillRegState(true));
2105   MIB.setMemRefs(MMOs);
2106 
2107   // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2108 
2109   // Insert setup.
2110   MIB =
2111       BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2112 
2113   const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2114   MIB.addRegMask(RegInfo->getNoPreservedMask());
2115   ThisMBB->addSuccessor(MainMBB);
2116   ThisMBB->addSuccessor(RestoreMBB);
2117 
2118   // MainMBB:
2119   BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2120       .addImm(0)
2121       .addImm(0)
2122       .addImm(0);
2123   MainMBB->addSuccessor(SinkMBB);
2124 
2125   // SinkMBB:
2126   BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2127       .addReg(MainDestReg)
2128       .addMBB(MainMBB)
2129       .addReg(RestoreDestReg)
2130       .addMBB(RestoreMBB);
2131 
2132   // RestoreMBB:
2133   // Restore BP from buf[3] iff this function is using BP.  The address of
2134   // buf is in SX10.
2135   // FIXME: Better to not use SX10 here
2136   if (TFI->hasBP(*MF)) {
2137     MachineInstrBuilder MIB =
2138         BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2139     MIB.addReg(VE::SX10);
2140     MIB.addImm(0);
2141     MIB.addImm(24);
2142     MIB.setMemRefs(MMOs);
2143   }
2144   BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2145       .addImm(0)
2146       .addImm(0)
2147       .addImm(1);
2148   BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2149   RestoreMBB->addSuccessor(SinkMBB);
2150 
2151   MI.eraseFromParent();
2152   return SinkMBB;
2153 }
2154 
2155 MachineBasicBlock *
2156 VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2157                                     MachineBasicBlock *MBB) const {
2158   DebugLoc DL = MI.getDebugLoc();
2159   MachineFunction *MF = MBB->getParent();
2160   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2161   MachineRegisterInfo &MRI = MF->getRegInfo();
2162 
2163   // Memory Reference.
2164   SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2165                                            MI.memoperands_end());
2166   Register BufReg = MI.getOperand(0).getReg();
2167 
2168   Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2169   // Since FP is only updated here but NOT referenced, it's treated as GPR.
2170   Register FP = VE::SX9;
2171   Register SP = VE::SX11;
2172 
2173   MachineInstrBuilder MIB;
2174 
2175   MachineBasicBlock *ThisMBB = MBB;
2176 
2177   // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2178   //
2179   // ThisMBB:
2180   //   %fp = load buf[0]
2181   //   %jmp = load buf[1]
2182   //   %s10 = buf        ; Store an address of buf to SX10 for RestoreMBB
2183   //   %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2184   //   jmp %jmp
2185 
2186   // Reload FP.
2187   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2188   MIB.addReg(BufReg);
2189   MIB.addImm(0);
2190   MIB.addImm(0);
2191   MIB.setMemRefs(MMOs);
2192 
2193   // Reload IP.
2194   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2195   MIB.addReg(BufReg);
2196   MIB.addImm(0);
2197   MIB.addImm(8);
2198   MIB.setMemRefs(MMOs);
2199 
2200   // Copy BufReg to SX10 for later use in setjmp.
2201   // FIXME: Better to not use SX10 here
2202   BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2203       .addReg(BufReg)
2204       .addImm(0);
2205 
2206   // Reload SP.
2207   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2208   MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2209   MIB.addImm(0);
2210   MIB.addImm(16);
2211   MIB.setMemRefs(MMOs);
2212 
2213   // Jump.
2214   BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2215       .addReg(Tmp, getKillRegState(true))
2216       .addImm(0);
2217 
2218   MI.eraseFromParent();
2219   return ThisMBB;
2220 }
2221 
2222 MachineBasicBlock *
2223 VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2224                                         MachineBasicBlock *BB) const {
2225   DebugLoc DL = MI.getDebugLoc();
2226   MachineFunction *MF = BB->getParent();
2227   MachineFrameInfo &MFI = MF->getFrameInfo();
2228   MachineRegisterInfo &MRI = MF->getRegInfo();
2229   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2230   int FI = MFI.getFunctionContextIndex();
2231 
2232   // Get a mapping of the call site numbers to all of the landing pads they're
2233   // associated with.
2234   DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2235   unsigned MaxCSNum = 0;
2236   for (auto &MBB : *MF) {
2237     if (!MBB.isEHPad())
2238       continue;
2239 
2240     MCSymbol *Sym = nullptr;
2241     for (const auto &MI : MBB) {
2242       if (MI.isDebugInstr())
2243         continue;
2244 
2245       assert(MI.isEHLabel() && "expected EH_LABEL");
2246       Sym = MI.getOperand(0).getMCSymbol();
2247       break;
2248     }
2249 
2250     if (!MF->hasCallSiteLandingPad(Sym))
2251       continue;
2252 
2253     for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2254       CallSiteNumToLPad[CSI].push_back(&MBB);
2255       MaxCSNum = std::max(MaxCSNum, CSI);
2256     }
2257   }
2258 
2259   // Get an ordered list of the machine basic blocks for the jump table.
2260   std::vector<MachineBasicBlock *> LPadList;
2261   SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2262   LPadList.reserve(CallSiteNumToLPad.size());
2263 
2264   for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2265     for (auto &LP : CallSiteNumToLPad[CSI]) {
2266       LPadList.push_back(LP);
2267       InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
2268     }
2269   }
2270 
2271   assert(!LPadList.empty() &&
2272          "No landing pad destinations for the dispatch jump table!");
2273 
2274   // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2275   //   %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2276   //
2277   // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2278   // First `i64` is callsite, so callsite is FI+8.
2279   static const int OffsetIC = 72;
2280   static const int OffsetCS = 8;
2281 
2282   // Create the MBBs for the dispatch code like following:
2283   //
2284   // ThisMBB:
2285   //   Prepare DispatchBB address and store it to buf[1].
2286   //   ...
2287   //
2288   // DispatchBB:
2289   //   %s15 = GETGOT iff isPositionIndependent
2290   //   %callsite = load callsite
2291   //   brgt.l.t #size of callsites, %callsite, DispContBB
2292   //
2293   // TrapBB:
2294   //   Call abort.
2295   //
2296   // DispContBB:
2297   //   %breg = address of jump table
2298   //   %pc = load and calculate next pc from %breg and %callsite
2299   //   jmp %pc
2300 
2301   // Shove the dispatch's address into the return slot in the function context.
2302   MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2303   DispatchBB->setIsEHPad(true);
2304 
2305   // Trap BB will causes trap like `assert(0)`.
2306   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2307   DispatchBB->addSuccessor(TrapBB);
2308 
2309   MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2310   DispatchBB->addSuccessor(DispContBB);
2311 
2312   // Insert MBBs.
2313   MF->push_back(DispatchBB);
2314   MF->push_back(DispContBB);
2315   MF->push_back(TrapBB);
2316 
2317   // Insert code to call abort in the TrapBB.
2318   Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2319                                  /* Local */ false, /* Call */ true);
2320   BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2321       .addReg(Abort, getKillRegState(true))
2322       .addImm(0)
2323       .addImm(0);
2324 
2325   // Insert code into the entry block that creates and registers the function
2326   // context.
2327   setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2328 
2329   // Create the jump table and associated information
2330   unsigned JTE = getJumpTableEncoding();
2331   MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
2332   unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2333 
2334   const VERegisterInfo &RI = TII->getRegisterInfo();
2335   // Add a register mask with no preserved registers.  This results in all
2336   // registers being marked as clobbered.
2337   BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2338       .addRegMask(RI.getNoPreservedMask());
2339 
2340   if (isPositionIndependent()) {
2341     // Force to generate GETGOT, since current implementation doesn't store GOT
2342     // register.
2343     BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2344   }
2345 
2346   // IReg is used as an index in a memory operand and therefore can't be SP
2347   const TargetRegisterClass *RC = &VE::I64RegClass;
2348   Register IReg = MRI.createVirtualRegister(RC);
2349   addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2350                     OffsetCS);
2351   if (LPadList.size() < 64) {
2352     BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2353         .addImm(VECC::CC_ILE)
2354         .addImm(LPadList.size())
2355         .addReg(IReg)
2356         .addMBB(TrapBB);
2357   } else {
2358     assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2359     Register TmpReg = MRI.createVirtualRegister(RC);
2360     BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2361         .addImm(0)
2362         .addImm(0)
2363         .addImm(LPadList.size());
2364     BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2365         .addImm(VECC::CC_ILE)
2366         .addReg(TmpReg, getKillRegState(true))
2367         .addReg(IReg)
2368         .addMBB(TrapBB);
2369   }
2370 
2371   Register BReg = MRI.createVirtualRegister(RC);
2372   Register Tmp1 = MRI.createVirtualRegister(RC);
2373   Register Tmp2 = MRI.createVirtualRegister(RC);
2374 
2375   if (isPositionIndependent()) {
2376     // Create following instructions for local linkage PIC code.
2377     //     lea    %Tmp1, .LJTI0_0@gotoff_lo
2378     //     and    %Tmp2, %Tmp1, (32)0
2379     //     lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2380     BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2381         .addImm(0)
2382         .addImm(0)
2383         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
2384     BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2385         .addReg(Tmp1, getKillRegState(true))
2386         .addImm(M0(32));
2387     BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2388         .addReg(VE::SX15)
2389         .addReg(Tmp2, getKillRegState(true))
2390         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
2391   } else {
2392     // Create following instructions for non-PIC code.
2393     //     lea     %Tmp1, .LJTI0_0@lo
2394     //     and     %Tmp2, %Tmp1, (32)0
2395     //     lea.sl  %BReg, .LJTI0_0@hi(%Tmp2)
2396     BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2397         .addImm(0)
2398         .addImm(0)
2399         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
2400     BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2401         .addReg(Tmp1, getKillRegState(true))
2402         .addImm(M0(32));
2403     BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2404         .addReg(Tmp2, getKillRegState(true))
2405         .addImm(0)
2406         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
2407   }
2408 
2409   switch (JTE) {
2410   case MachineJumpTableInfo::EK_BlockAddress: {
2411     // Generate simple block address code for no-PIC model.
2412     //     sll %Tmp1, %IReg, 3
2413     //     lds %TReg, 0(%Tmp1, %BReg)
2414     //     bcfla %TReg
2415 
2416     Register TReg = MRI.createVirtualRegister(RC);
2417     Register Tmp1 = MRI.createVirtualRegister(RC);
2418 
2419     BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2420         .addReg(IReg, getKillRegState(true))
2421         .addImm(3);
2422     BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2423         .addReg(BReg, getKillRegState(true))
2424         .addReg(Tmp1, getKillRegState(true))
2425         .addImm(0);
2426     BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2427         .addReg(TReg, getKillRegState(true))
2428         .addImm(0);
2429     break;
2430   }
2431   case MachineJumpTableInfo::EK_Custom32: {
2432     // Generate block address code using differences from the function pointer
2433     // for PIC model.
2434     //     sll %Tmp1, %IReg, 2
2435     //     ldl.zx %OReg, 0(%Tmp1, %BReg)
2436     //     Prepare function address in BReg2.
2437     //     adds.l %TReg, %BReg2, %OReg
2438     //     bcfla %TReg
2439 
2440     assert(isPositionIndependent());
2441     Register OReg = MRI.createVirtualRegister(RC);
2442     Register TReg = MRI.createVirtualRegister(RC);
2443     Register Tmp1 = MRI.createVirtualRegister(RC);
2444 
2445     BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2446         .addReg(IReg, getKillRegState(true))
2447         .addImm(2);
2448     BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2449         .addReg(BReg, getKillRegState(true))
2450         .addReg(Tmp1, getKillRegState(true))
2451         .addImm(0);
2452     Register BReg2 =
2453         prepareSymbol(*DispContBB, DispContBB->end(),
2454                       DispContBB->getParent()->getName(), DL, /* Local */ true);
2455     BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2456         .addReg(OReg, getKillRegState(true))
2457         .addReg(BReg2, getKillRegState(true));
2458     BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2459         .addReg(TReg, getKillRegState(true))
2460         .addImm(0);
2461     break;
2462   }
2463   default:
2464     llvm_unreachable("Unexpected jump table encoding");
2465   }
2466 
2467   // Add the jump table entries as successors to the MBB.
2468   SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2469   for (auto &LP : LPadList)
2470     if (SeenMBBs.insert(LP).second)
2471       DispContBB->addSuccessor(LP);
2472 
2473   // N.B. the order the invoke BBs are processed in doesn't matter here.
2474   SmallVector<MachineBasicBlock *, 64> MBBLPads;
2475   const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2476   for (MachineBasicBlock *MBB : InvokeBBs) {
2477     // Remove the landing pad successor from the invoke block and replace it
2478     // with the new dispatch block.
2479     // Keep a copy of Successors since it's modified inside the loop.
2480     SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2481                                                    MBB->succ_rend());
2482     // FIXME: Avoid quadratic complexity.
2483     for (auto MBBS : Successors) {
2484       if (MBBS->isEHPad()) {
2485         MBB->removeSuccessor(MBBS);
2486         MBBLPads.push_back(MBBS);
2487       }
2488     }
2489 
2490     MBB->addSuccessor(DispatchBB);
2491 
2492     // Find the invoke call and mark all of the callee-saved registers as
2493     // 'implicit defined' so that they're spilled.  This prevents code from
2494     // moving instructions to before the EH block, where they will never be
2495     // executed.
2496     for (auto &II : reverse(*MBB)) {
2497       if (!II.isCall())
2498         continue;
2499 
2500       DenseMap<Register, bool> DefRegs;
2501       for (auto &MOp : II.operands())
2502         if (MOp.isReg())
2503           DefRegs[MOp.getReg()] = true;
2504 
2505       MachineInstrBuilder MIB(*MF, &II);
2506       for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2507         Register Reg = SavedRegs[RI];
2508         if (!DefRegs[Reg])
2509           MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
2510       }
2511 
2512       break;
2513     }
2514   }
2515 
2516   // Mark all former landing pads as non-landing pads.  The dispatch is the only
2517   // landing pad now.
2518   for (auto &LP : MBBLPads)
2519     LP->setIsEHPad(false);
2520 
2521   // The instruction is gone now.
2522   MI.eraseFromParent();
2523   return BB;
2524 }
2525 
2526 MachineBasicBlock *
2527 VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2528                                               MachineBasicBlock *BB) const {
2529   switch (MI.getOpcode()) {
2530   default:
2531     llvm_unreachable("Unknown Custom Instruction!");
2532   case VE::EH_SjLj_LongJmp:
2533     return emitEHSjLjLongJmp(MI, BB);
2534   case VE::EH_SjLj_SetJmp:
2535     return emitEHSjLjSetJmp(MI, BB);
2536   case VE::EH_SjLj_Setup_Dispatch:
2537     return emitSjLjDispatchBlock(MI, BB);
2538   }
2539 }
2540 
2541 static bool isI32Insn(const SDNode *User, const SDNode *N) {
2542   switch (User->getOpcode()) {
2543   default:
2544     return false;
2545   case ISD::ADD:
2546   case ISD::SUB:
2547   case ISD::MUL:
2548   case ISD::SDIV:
2549   case ISD::UDIV:
2550   case ISD::SETCC:
2551   case ISD::SMIN:
2552   case ISD::SMAX:
2553   case ISD::SHL:
2554   case ISD::SRA:
2555   case ISD::BSWAP:
2556   case ISD::SINT_TO_FP:
2557   case ISD::UINT_TO_FP:
2558   case ISD::BR_CC:
2559   case ISD::BITCAST:
2560   case ISD::ATOMIC_CMP_SWAP:
2561   case ISD::ATOMIC_SWAP:
2562     return true;
2563   case ISD::SRL:
2564     if (N->getOperand(0).getOpcode() != ISD::SRL)
2565       return true;
2566     // (srl (trunc (srl ...))) may be optimized by combining srl, so
2567     // doesn't optimize trunc now.
2568     return false;
2569   case ISD::SELECT_CC:
2570     if (User->getOperand(2).getNode() != N &&
2571         User->getOperand(3).getNode() != N)
2572       return true;
2573     LLVM_FALLTHROUGH;
2574   case ISD::AND:
2575   case ISD::OR:
2576   case ISD::XOR:
2577   case ISD::SELECT:
2578   case ISD::CopyToReg:
2579     // Check all use of selections, bit operations, and copies.  If all of them
2580     // are safe, optimize truncate to extract_subreg.
2581     for (const SDNode *U : User->uses()) {
2582       switch (U->getOpcode()) {
2583       default:
2584         // If the use is an instruction which treats the source operand as i32,
2585         // it is safe to avoid truncate here.
2586         if (isI32Insn(U, N))
2587           continue;
2588         break;
2589       case ISD::ANY_EXTEND:
2590       case ISD::SIGN_EXTEND:
2591       case ISD::ZERO_EXTEND: {
2592         // Special optimizations to the combination of ext and trunc.
2593         // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2594         // since this truncate instruction clears higher 32 bits which is filled
2595         // by one of ext instructions later.
2596         assert(N->getValueType(0) == MVT::i32 &&
2597                "find truncate to not i32 integer");
2598         if (User->getOpcode() == ISD::SELECT_CC ||
2599             User->getOpcode() == ISD::SELECT)
2600           continue;
2601         break;
2602       }
2603       }
2604       return false;
2605     }
2606     return true;
2607   }
2608 }
2609 
2610 // Optimize TRUNCATE in DAG combining.  Optimizing it in CUSTOM lower is
2611 // sometime too early.  Optimizing it in DAG pattern matching in VEInstrInfo.td
2612 // is sometime too late.  So, doing it at here.
2613 SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2614                                           DAGCombinerInfo &DCI) const {
2615   assert(N->getOpcode() == ISD::TRUNCATE &&
2616          "Should be called with a TRUNCATE node");
2617 
2618   SelectionDAG &DAG = DCI.DAG;
2619   SDLoc DL(N);
2620   EVT VT = N->getValueType(0);
2621 
2622   // We prefer to do this when all types are legal.
2623   if (!DCI.isAfterLegalizeDAG())
2624     return SDValue();
2625 
2626   // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2627   if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
2628       isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
2629       isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
2630     return SDValue();
2631 
2632   // Check all use of this TRUNCATE.
2633   for (const SDNode *User : N->uses()) {
2634     // Make sure that we're not going to replace TRUNCATE for non i32
2635     // instructions.
2636     //
2637     // FIXME: Although we could sometimes handle this, and it does occur in
2638     // practice that one of the condition inputs to the select is also one of
2639     // the outputs, we currently can't deal with this.
2640     if (isI32Insn(User, N))
2641       continue;
2642 
2643     return SDValue();
2644   }
2645 
2646   SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
2647   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
2648                                     N->getOperand(0), SubI32),
2649                  0);
2650 }
2651 
2652 SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
2653                                             DAGCombinerInfo &DCI) const {
2654   switch (N->getOpcode()) {
2655   default:
2656     break;
2657   case ISD::TRUNCATE:
2658     return combineTRUNCATE(N, DCI);
2659   }
2660 
2661   return SDValue();
2662 }
2663 
2664 //===----------------------------------------------------------------------===//
2665 // VE Inline Assembly Support
2666 //===----------------------------------------------------------------------===//
2667 
2668 VETargetLowering::ConstraintType
2669 VETargetLowering::getConstraintType(StringRef Constraint) const {
2670   if (Constraint.size() == 1) {
2671     switch (Constraint[0]) {
2672     default:
2673       break;
2674     case 'v': // vector registers
2675       return C_RegisterClass;
2676     }
2677   }
2678   return TargetLowering::getConstraintType(Constraint);
2679 }
2680 
2681 std::pair<unsigned, const TargetRegisterClass *>
2682 VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2683                                                StringRef Constraint,
2684                                                MVT VT) const {
2685   const TargetRegisterClass *RC = nullptr;
2686   if (Constraint.size() == 1) {
2687     switch (Constraint[0]) {
2688     default:
2689       return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2690     case 'r':
2691       RC = &VE::I64RegClass;
2692       break;
2693     case 'v':
2694       RC = &VE::V64RegClass;
2695       break;
2696     }
2697     return std::make_pair(0U, RC);
2698   }
2699 
2700   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2701 }
2702 
2703 //===----------------------------------------------------------------------===//
2704 // VE Target Optimization Support
2705 //===----------------------------------------------------------------------===//
2706 
2707 unsigned VETargetLowering::getMinimumJumpTableEntries() const {
2708   // Specify 8 for PIC model to relieve the impact of PIC load instructions.
2709   if (isJumpTableRelative())
2710     return 8;
2711 
2712   return TargetLowering::getMinimumJumpTableEntries();
2713 }
2714 
2715 bool VETargetLowering::hasAndNot(SDValue Y) const {
2716   EVT VT = Y.getValueType();
2717 
2718   // VE doesn't have vector and not instruction.
2719   if (VT.isVector())
2720     return false;
2721 
2722   // VE allows different immediate values for X and Y where ~X & Y.
2723   // Only simm7 works for X, and only mimm works for Y on VE.  However, this
2724   // function is used to check whether an immediate value is OK for and-not
2725   // instruction as both X and Y.  Generating additional instruction to
2726   // retrieve an immediate value is no good since the purpose of this
2727   // function is to convert a series of 3 instructions to another series of
2728   // 3 instructions with better parallelism.  Therefore, we return false
2729   // for all immediate values now.
2730   // FIXME: Change hasAndNot function to have two operands to make it work
2731   //        correctly with Aurora VE.
2732   if (isa<ConstantSDNode>(Y))
2733     return false;
2734 
2735   // It's ok for generic registers.
2736   return true;
2737 }
2738 
2739 SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2740                                                   SelectionDAG &DAG) const {
2741   assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
2742   MVT VT = Op.getOperand(0).getSimpleValueType();
2743 
2744   // Special treatment for packed V64 types.
2745   assert(VT == MVT::v512i32 || VT == MVT::v512f32);
2746   (void)VT;
2747   // Example of codes:
2748   //   %packed_v = extractelt %vr, %idx / 2
2749   //   %v = %packed_v >> (%idx % 2 * 32)
2750   //   %res = %v & 0xffffffff
2751 
2752   SDValue Vec = Op.getOperand(0);
2753   SDValue Idx = Op.getOperand(1);
2754   SDLoc DL(Op);
2755   SDValue Result = Op;
2756   if (false /* Idx->isConstant() */) {
2757     // TODO: optimized implementation using constant values
2758   } else {
2759     SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
2760     SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
2761     SDValue PackedElt =
2762         SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
2763     SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
2764     SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
2765     SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
2766     Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
2767     PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
2768     SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
2769     PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
2770     SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
2771     Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
2772                                         MVT::i32, PackedElt, SubI32),
2773                      0);
2774 
2775     if (Op.getSimpleValueType() == MVT::f32) {
2776       Result = DAG.getBitcast(MVT::f32, Result);
2777     } else {
2778       assert(Op.getSimpleValueType() == MVT::i32);
2779     }
2780   }
2781   return Result;
2782 }
2783 
2784 SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2785                                                  SelectionDAG &DAG) const {
2786   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
2787   MVT VT = Op.getOperand(0).getSimpleValueType();
2788 
2789   // Special treatment for packed V64 types.
2790   assert(VT == MVT::v512i32 || VT == MVT::v512f32);
2791   (void)VT;
2792   // The v512i32 and v512f32 starts from upper bits (0..31).  This "upper
2793   // bits" required `val << 32` from C implementation's point of view.
2794   //
2795   // Example of codes:
2796   //   %packed_elt = extractelt %vr, (%idx >> 1)
2797   //   %shift = ((%idx & 1) ^ 1) << 5
2798   //   %packed_elt &= 0xffffffff00000000 >> shift
2799   //   %packed_elt |= (zext %val) << shift
2800   //   %vr = insertelt %vr, %packed_elt, (%idx >> 1)
2801 
2802   SDLoc DL(Op);
2803   SDValue Vec = Op.getOperand(0);
2804   SDValue Val = Op.getOperand(1);
2805   SDValue Idx = Op.getOperand(2);
2806   if (Idx.getSimpleValueType() == MVT::i32)
2807     Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
2808   if (Val.getSimpleValueType() == MVT::f32)
2809     Val = DAG.getBitcast(MVT::i32, Val);
2810   assert(Val.getSimpleValueType() == MVT::i32);
2811   Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
2812 
2813   SDValue Result = Op;
2814   if (false /* Idx->isConstant()*/) {
2815     // TODO: optimized implementation using constant values
2816   } else {
2817     SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
2818     SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
2819     SDValue PackedElt =
2820         SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
2821     SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
2822     SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
2823     SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
2824     Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
2825     SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
2826     Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
2827     PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
2828     Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
2829     PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
2830     Result =
2831         SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
2832                                    {HalfIdx, PackedElt, Vec}),
2833                 0);
2834   }
2835   return Result;
2836 }
2837