1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that VE uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "VEISelLowering.h"
15 #include "MCTargetDesc/VEMCExpr.h"
16 #include "VECustomDAG.h"
17 #include "VEInstrBuilder.h"
18 #include "VEMachineFunctionInfo.h"
19 #include "VERegisterInfo.h"
20 #include "VETargetMachine.h"
21 #include "llvm/ADT/StringSwitch.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
27 #include "llvm/CodeGen/MachineModuleInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/SelectionDAG.h"
30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Module.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/KnownBits.h"
37 using namespace llvm;
38
39 #define DEBUG_TYPE "ve-lower"
40
41 //===----------------------------------------------------------------------===//
42 // Calling Convention Implementation
43 //===----------------------------------------------------------------------===//
44
45 #include "VEGenCallingConv.inc"
46
getReturnCC(CallingConv::ID CallConv)47 CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
48 switch (CallConv) {
49 default:
50 return RetCC_VE_C;
51 case CallingConv::Fast:
52 return RetCC_VE_Fast;
53 }
54 }
55
getParamCC(CallingConv::ID CallConv,bool IsVarArg)56 CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
57 if (IsVarArg)
58 return CC_VE2;
59 switch (CallConv) {
60 default:
61 return CC_VE_C;
62 case CallingConv::Fast:
63 return CC_VE_Fast;
64 }
65 }
66
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context) const67 bool VETargetLowering::CanLowerReturn(
68 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
69 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
70 CCAssignFn *RetCC = getReturnCC(CallConv);
71 SmallVector<CCValAssign, 16> RVLocs;
72 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73 return CCInfo.CheckReturn(Outs, RetCC);
74 }
75
76 static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
77 MVT::v256f32, MVT::v512f32, MVT::v256f64};
78
79 static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80
81 static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82
initRegisterClasses()83 void VETargetLowering::initRegisterClasses() {
84 // Set up the register classes.
85 addRegisterClass(MVT::i32, &VE::I32RegClass);
86 addRegisterClass(MVT::i64, &VE::I64RegClass);
87 addRegisterClass(MVT::f32, &VE::F32RegClass);
88 addRegisterClass(MVT::f64, &VE::I64RegClass);
89 addRegisterClass(MVT::f128, &VE::F128RegClass);
90
91 if (Subtarget->enableVPU()) {
92 for (MVT VecVT : AllVectorVTs)
93 addRegisterClass(VecVT, &VE::V64RegClass);
94 addRegisterClass(MVT::v256i1, &VE::VMRegClass);
95 addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
96 }
97 }
98
initSPUActions()99 void VETargetLowering::initSPUActions() {
100 const auto &TM = getTargetMachine();
101 /// Load & Store {
102
103 // VE doesn't have i1 sign extending load.
104 for (MVT VT : MVT::integer_valuetypes()) {
105 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
106 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
107 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
108 setTruncStoreAction(VT, MVT::i1, Expand);
109 }
110
111 // VE doesn't have floating point extload/truncstore, so expand them.
112 for (MVT FPVT : MVT::fp_valuetypes()) {
113 for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114 setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
115 setTruncStoreAction(FPVT, OtherFPVT, Expand);
116 }
117 }
118
119 // VE doesn't have fp128 load/store, so expand them in custom lower.
120 setOperationAction(ISD::LOAD, MVT::f128, Custom);
121 setOperationAction(ISD::STORE, MVT::f128, Custom);
122
123 /// } Load & Store
124
125 // Custom legalize address nodes into LO/HI parts.
126 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
127 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
128 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
129 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
130 setOperationAction(ISD::ConstantPool, PtrVT, Custom);
131 setOperationAction(ISD::JumpTable, PtrVT, Custom);
132
133 /// VAARG handling {
134 setOperationAction(ISD::VASTART, MVT::Other, Custom);
135 // VAARG needs to be lowered to access with 8 bytes alignment.
136 setOperationAction(ISD::VAARG, MVT::Other, Custom);
137 // Use the default implementation.
138 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
139 setOperationAction(ISD::VAEND, MVT::Other, Expand);
140 /// } VAARG handling
141
142 /// Stack {
143 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
144 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
145
146 // Use the default implementation.
147 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
148 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
149 /// } Stack
150
151 /// Branch {
152
153 // VE doesn't have BRCOND
154 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
155
156 // BR_JT is not implemented yet.
157 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158
159 /// } Branch
160
161 /// Int Ops {
162 for (MVT IntVT : {MVT::i32, MVT::i64}) {
163 // VE has no REM or DIVREM operations.
164 setOperationAction(ISD::UREM, IntVT, Expand);
165 setOperationAction(ISD::SREM, IntVT, Expand);
166 setOperationAction(ISD::SDIVREM, IntVT, Expand);
167 setOperationAction(ISD::UDIVREM, IntVT, Expand);
168
169 // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170 setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
171 setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
172 setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
173
174 // VE has no MULHU/S or U/SMUL_LOHI operations.
175 // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176 setOperationAction(ISD::MULHU, IntVT, Expand);
177 setOperationAction(ISD::MULHS, IntVT, Expand);
178 setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
179 setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
180
181 // VE has no CTTZ, ROTL, ROTR operations.
182 setOperationAction(ISD::CTTZ, IntVT, Expand);
183 setOperationAction(ISD::ROTL, IntVT, Expand);
184 setOperationAction(ISD::ROTR, IntVT, Expand);
185
186 // VE has 64 bits instruction which works as i64 BSWAP operation. This
187 // instruction works fine as i32 BSWAP operation with an additional
188 // parameter. Use isel patterns to lower BSWAP.
189 setOperationAction(ISD::BSWAP, IntVT, Legal);
190
191 // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192 // operations. Use isel patterns for i64, promote for i32.
193 LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
194 setOperationAction(ISD::BITREVERSE, IntVT, Act);
195 setOperationAction(ISD::CTLZ, IntVT, Act);
196 setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
197 setOperationAction(ISD::CTPOP, IntVT, Act);
198
199 // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200 // Use isel patterns for i64, promote for i32.
201 setOperationAction(ISD::AND, IntVT, Act);
202 setOperationAction(ISD::OR, IntVT, Act);
203 setOperationAction(ISD::XOR, IntVT, Act);
204 }
205 /// } Int Ops
206
207 /// Conversion {
208 // VE doesn't have instructions for fp<->uint, so expand them by llvm
209 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
210 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
211 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
212 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
213
214 // fp16 not supported
215 for (MVT FPVT : MVT::fp_valuetypes()) {
216 setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
217 setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
218 }
219 /// } Conversion
220
221 /// Floating-point Ops {
222 /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
223 /// and fcmp.
224
225 // VE doesn't have following floating point operations.
226 for (MVT VT : MVT::fp_valuetypes()) {
227 setOperationAction(ISD::FNEG, VT, Expand);
228 setOperationAction(ISD::FREM, VT, Expand);
229 }
230
231 // VE doesn't have fdiv of f128.
232 setOperationAction(ISD::FDIV, MVT::f128, Expand);
233
234 for (MVT FPVT : {MVT::f32, MVT::f64}) {
235 // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
236 setOperationAction(ISD::ConstantFP, FPVT, Legal);
237 }
238 /// } Floating-point Ops
239
240 /// Floating-point math functions {
241
242 // VE doesn't have following floating point math functions.
243 for (MVT VT : MVT::fp_valuetypes()) {
244 setOperationAction(ISD::FABS, VT, Expand);
245 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
246 setOperationAction(ISD::FCOS, VT, Expand);
247 setOperationAction(ISD::FSIN, VT, Expand);
248 setOperationAction(ISD::FSQRT, VT, Expand);
249 }
250
251 /// } Floating-point math functions
252
253 /// Atomic instructions {
254
255 setMaxAtomicSizeInBitsSupported(64);
256 setMinCmpXchgSizeInBits(32);
257 setSupportsUnalignedAtomics(false);
258
259 // Use custom inserter for ATOMIC_FENCE.
260 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
261
262 // Other atomic instructions.
263 for (MVT VT : MVT::integer_valuetypes()) {
264 // Support i8/i16 atomic swap.
265 setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
266
267 // FIXME: Support "atmam" instructions.
268 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
269 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
270 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
271 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
272
273 // VE doesn't have follwing instructions.
274 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
275 setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
276 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
277 setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
278 setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
279 setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
280 setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
281 setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
282 }
283
284 /// } Atomic instructions
285
286 /// SJLJ instructions {
287 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
288 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
289 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
290 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
291 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
292 /// } SJLJ instructions
293
294 // Intrinsic instructions
295 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
296 }
297
initVPUActions()298 void VETargetLowering::initVPUActions() {
299 for (MVT LegalMaskVT : AllMaskVTs)
300 setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
301
302 for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
303 setOperationAction(Opc, MVT::v512i1, Custom);
304
305 for (MVT LegalVecVT : AllVectorVTs) {
306 setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
307 setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
308 setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
309 // Translate all vector instructions with legal element types to VVP_*
310 // nodes.
311 // TODO We will custom-widen into VVP_* nodes in the future. While we are
312 // buildling the infrastructure for this, we only do this for legal vector
313 // VTs.
314 #define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
315 setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
316 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
317 setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
318 setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
319 setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
320 #include "VVPNodes.def"
321 }
322
323 for (MVT LegalPackedVT : AllPackedVTs) {
324 setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
325 setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
326 }
327
328 // vNt32, vNt64 ops (legal element types)
329 for (MVT VT : MVT::vector_valuetypes()) {
330 MVT ElemVT = VT.getVectorElementType();
331 unsigned ElemBits = ElemVT.getScalarSizeInBits();
332 if (ElemBits != 32 && ElemBits != 64)
333 continue;
334
335 for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
336 setOperationAction(MemOpc, VT, Custom);
337
338 const ISD::NodeType IntReductionOCs[] = {
339 ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
340 ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
341 ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
342
343 for (unsigned IntRedOpc : IntReductionOCs)
344 setOperationAction(IntRedOpc, VT, Custom);
345 }
346
347 // v256i1 and v512i1 ops
348 for (MVT MaskVT : AllMaskVTs) {
349 // Custom lower mask ops
350 setOperationAction(ISD::STORE, MaskVT, Custom);
351 setOperationAction(ISD::LOAD, MaskVT, Custom);
352 }
353 }
354
355 SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const356 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
357 bool IsVarArg,
358 const SmallVectorImpl<ISD::OutputArg> &Outs,
359 const SmallVectorImpl<SDValue> &OutVals,
360 const SDLoc &DL, SelectionDAG &DAG) const {
361 // CCValAssign - represent the assignment of the return value to locations.
362 SmallVector<CCValAssign, 16> RVLocs;
363
364 // CCState - Info about the registers and stack slot.
365 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
366 *DAG.getContext());
367
368 // Analyze return values.
369 CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
370
371 SDValue Flag;
372 SmallVector<SDValue, 4> RetOps(1, Chain);
373
374 // Copy the result values into the output registers.
375 for (unsigned i = 0; i != RVLocs.size(); ++i) {
376 CCValAssign &VA = RVLocs[i];
377 assert(VA.isRegLoc() && "Can only return in registers!");
378 assert(!VA.needsCustom() && "Unexpected custom lowering");
379 SDValue OutVal = OutVals[i];
380
381 // Integer return values must be sign or zero extended by the callee.
382 switch (VA.getLocInfo()) {
383 case CCValAssign::Full:
384 break;
385 case CCValAssign::SExt:
386 OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
387 break;
388 case CCValAssign::ZExt:
389 OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
390 break;
391 case CCValAssign::AExt:
392 OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
393 break;
394 case CCValAssign::BCvt: {
395 // Convert a float return value to i64 with padding.
396 // 63 31 0
397 // +------+------+
398 // | float| 0 |
399 // +------+------+
400 assert(VA.getLocVT() == MVT::i64);
401 assert(VA.getValVT() == MVT::f32);
402 SDValue Undef = SDValue(
403 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
404 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
405 OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
406 MVT::i64, Undef, OutVal, Sub_f32),
407 0);
408 break;
409 }
410 default:
411 llvm_unreachable("Unknown loc info!");
412 }
413
414 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
415
416 // Guarantee that all emitted copies are stuck together with flags.
417 Flag = Chain.getValue(1);
418 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
419 }
420
421 RetOps[0] = Chain; // Update chain.
422
423 // Add the flag if we have it.
424 if (Flag.getNode())
425 RetOps.push_back(Flag);
426
427 return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
428 }
429
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const430 SDValue VETargetLowering::LowerFormalArguments(
431 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
432 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
433 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
434 MachineFunction &MF = DAG.getMachineFunction();
435
436 // Get the base offset of the incoming arguments stack space.
437 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
438 // Get the size of the preserved arguments area
439 unsigned ArgsPreserved = 64;
440
441 // Analyze arguments according to CC_VE.
442 SmallVector<CCValAssign, 16> ArgLocs;
443 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
444 *DAG.getContext());
445 // Allocate the preserved area first.
446 CCInfo.AllocateStack(ArgsPreserved, Align(8));
447 // We already allocated the preserved area, so the stack offset computed
448 // by CC_VE would be correct now.
449 CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
450
451 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
452 CCValAssign &VA = ArgLocs[i];
453 assert(!VA.needsCustom() && "Unexpected custom lowering");
454 if (VA.isRegLoc()) {
455 // This argument is passed in a register.
456 // All integer register arguments are promoted by the caller to i64.
457
458 // Create a virtual register for the promoted live-in value.
459 Register VReg =
460 MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
461 SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
462
463 // The caller promoted the argument, so insert an Assert?ext SDNode so we
464 // won't promote the value again in this function.
465 switch (VA.getLocInfo()) {
466 case CCValAssign::SExt:
467 Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
468 DAG.getValueType(VA.getValVT()));
469 break;
470 case CCValAssign::ZExt:
471 Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
472 DAG.getValueType(VA.getValVT()));
473 break;
474 case CCValAssign::BCvt: {
475 // Extract a float argument from i64 with padding.
476 // 63 31 0
477 // +------+------+
478 // | float| 0 |
479 // +------+------+
480 assert(VA.getLocVT() == MVT::i64);
481 assert(VA.getValVT() == MVT::f32);
482 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
483 Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
484 MVT::f32, Arg, Sub_f32),
485 0);
486 break;
487 }
488 default:
489 break;
490 }
491
492 // Truncate the register down to the argument type.
493 if (VA.isExtInLoc())
494 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
495
496 InVals.push_back(Arg);
497 continue;
498 }
499
500 // The registers are exhausted. This argument was passed on the stack.
501 assert(VA.isMemLoc());
502 // The CC_VE_Full/Half functions compute stack offsets relative to the
503 // beginning of the arguments area at %fp + the size of reserved area.
504 unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
505 unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
506
507 // Adjust offset for a float argument by adding 4 since the argument is
508 // stored in 8 bytes buffer with offset like below. LLVM generates
509 // 4 bytes load instruction, so need to adjust offset here. This
510 // adjustment is required in only LowerFormalArguments. In LowerCall,
511 // a float argument is converted to i64 first, and stored as 8 bytes
512 // data, which is required by ABI, so no need for adjustment.
513 // 0 4
514 // +------+------+
515 // | empty| float|
516 // +------+------+
517 if (VA.getValVT() == MVT::f32)
518 Offset += 4;
519
520 int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
521 InVals.push_back(
522 DAG.getLoad(VA.getValVT(), DL, Chain,
523 DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
524 MachinePointerInfo::getFixedStack(MF, FI)));
525 }
526
527 if (!IsVarArg)
528 return Chain;
529
530 // This function takes variable arguments, some of which may have been passed
531 // in registers %s0-%s8.
532 //
533 // The va_start intrinsic needs to know the offset to the first variable
534 // argument.
535 // TODO: need to calculate offset correctly once we support f128.
536 unsigned ArgOffset = ArgLocs.size() * 8;
537 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
538 // Skip the reserved area at the top of stack.
539 FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
540
541 return Chain;
542 }
543
544 // FIXME? Maybe this could be a TableGen attribute on some registers and
545 // this table could be generated automatically from RegInfo.
getRegisterByName(const char * RegName,LLT VT,const MachineFunction & MF) const546 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
547 const MachineFunction &MF) const {
548 Register Reg = StringSwitch<Register>(RegName)
549 .Case("sp", VE::SX11) // Stack pointer
550 .Case("fp", VE::SX9) // Frame pointer
551 .Case("sl", VE::SX8) // Stack limit
552 .Case("lr", VE::SX10) // Link register
553 .Case("tp", VE::SX14) // Thread pointer
554 .Case("outer", VE::SX12) // Outer regiser
555 .Case("info", VE::SX17) // Info area register
556 .Case("got", VE::SX15) // Global offset table register
557 .Case("plt", VE::SX16) // Procedure linkage table register
558 .Default(0);
559
560 if (Reg)
561 return Reg;
562
563 report_fatal_error("Invalid register name global variable");
564 }
565
566 //===----------------------------------------------------------------------===//
567 // TargetLowering Implementation
568 //===----------------------------------------------------------------------===//
569
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const570 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
571 SmallVectorImpl<SDValue> &InVals) const {
572 SelectionDAG &DAG = CLI.DAG;
573 SDLoc DL = CLI.DL;
574 SDValue Chain = CLI.Chain;
575 auto PtrVT = getPointerTy(DAG.getDataLayout());
576
577 // VE target does not yet support tail call optimization.
578 CLI.IsTailCall = false;
579
580 // Get the base offset of the outgoing arguments stack space.
581 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
582 // Get the size of the preserved arguments area
583 unsigned ArgsPreserved = 8 * 8u;
584
585 // Analyze operands of the call, assigning locations to each operand.
586 SmallVector<CCValAssign, 16> ArgLocs;
587 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
588 *DAG.getContext());
589 // Allocate the preserved area first.
590 CCInfo.AllocateStack(ArgsPreserved, Align(8));
591 // We already allocated the preserved area, so the stack offset computed
592 // by CC_VE would be correct now.
593 CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
594
595 // VE requires to use both register and stack for varargs or no-prototyped
596 // functions.
597 bool UseBoth = CLI.IsVarArg;
598
599 // Analyze operands again if it is required to store BOTH.
600 SmallVector<CCValAssign, 16> ArgLocs2;
601 CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
602 ArgLocs2, *DAG.getContext());
603 if (UseBoth)
604 CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
605
606 // Get the size of the outgoing arguments stack space requirement.
607 unsigned ArgsSize = CCInfo.getNextStackOffset();
608
609 // Keep stack frames 16-byte aligned.
610 ArgsSize = alignTo(ArgsSize, 16);
611
612 // Adjust the stack pointer to make room for the arguments.
613 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
614 // with more than 6 arguments.
615 Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
616
617 // Collect the set of registers to pass to the function and their values.
618 // This will be emitted as a sequence of CopyToReg nodes glued to the call
619 // instruction.
620 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
621
622 // Collect chains from all the memory opeations that copy arguments to the
623 // stack. They must follow the stack pointer adjustment above and precede the
624 // call instruction itself.
625 SmallVector<SDValue, 8> MemOpChains;
626
627 // VE needs to get address of callee function in a register
628 // So, prepare to copy it to SX12 here.
629
630 // If the callee is a GlobalAddress node (quite common, every direct call is)
631 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
632 // Likewise ExternalSymbol -> TargetExternalSymbol.
633 SDValue Callee = CLI.Callee;
634
635 bool IsPICCall = isPositionIndependent();
636
637 // PC-relative references to external symbols should go through $stub.
638 // If so, we need to prepare GlobalBaseReg first.
639 const TargetMachine &TM = DAG.getTarget();
640 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
641 const GlobalValue *GV = nullptr;
642 auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
643 if (CalleeG)
644 GV = CalleeG->getGlobal();
645 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
646 bool UsePlt = !Local;
647 MachineFunction &MF = DAG.getMachineFunction();
648
649 // Turn GlobalAddress/ExternalSymbol node into a value node
650 // containing the address of them here.
651 if (CalleeG) {
652 if (IsPICCall) {
653 if (UsePlt)
654 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
655 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
656 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
657 } else {
658 Callee =
659 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
660 }
661 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
662 if (IsPICCall) {
663 if (UsePlt)
664 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
665 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
666 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
667 } else {
668 Callee =
669 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
670 }
671 }
672
673 RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
674
675 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
676 CCValAssign &VA = ArgLocs[i];
677 SDValue Arg = CLI.OutVals[i];
678
679 // Promote the value if needed.
680 switch (VA.getLocInfo()) {
681 default:
682 llvm_unreachable("Unknown location info!");
683 case CCValAssign::Full:
684 break;
685 case CCValAssign::SExt:
686 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
687 break;
688 case CCValAssign::ZExt:
689 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
690 break;
691 case CCValAssign::AExt:
692 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
693 break;
694 case CCValAssign::BCvt: {
695 // Convert a float argument to i64 with padding.
696 // 63 31 0
697 // +------+------+
698 // | float| 0 |
699 // +------+------+
700 assert(VA.getLocVT() == MVT::i64);
701 assert(VA.getValVT() == MVT::f32);
702 SDValue Undef = SDValue(
703 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
704 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
705 Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
706 MVT::i64, Undef, Arg, Sub_f32),
707 0);
708 break;
709 }
710 }
711
712 if (VA.isRegLoc()) {
713 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
714 if (!UseBoth)
715 continue;
716 VA = ArgLocs2[i];
717 }
718
719 assert(VA.isMemLoc());
720
721 // Create a store off the stack pointer for this argument.
722 SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
723 // The argument area starts at %fp/%sp + the size of reserved area.
724 SDValue PtrOff =
725 DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
726 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
727 MemOpChains.push_back(
728 DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
729 }
730
731 // Emit all stores, make sure they occur before the call.
732 if (!MemOpChains.empty())
733 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
734
735 // Build a sequence of CopyToReg nodes glued together with token chain and
736 // glue operands which copy the outgoing args into registers. The InGlue is
737 // necessary since all emitted instructions must be stuck together in order
738 // to pass the live physical registers.
739 SDValue InGlue;
740 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
741 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
742 RegsToPass[i].second, InGlue);
743 InGlue = Chain.getValue(1);
744 }
745
746 // Build the operands for the call instruction itself.
747 SmallVector<SDValue, 8> Ops;
748 Ops.push_back(Chain);
749 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
750 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
751 RegsToPass[i].second.getValueType()));
752
753 // Add a register mask operand representing the call-preserved registers.
754 const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
755 const uint32_t *Mask =
756 TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
757 assert(Mask && "Missing call preserved mask for calling convention");
758 Ops.push_back(DAG.getRegisterMask(Mask));
759
760 // Make sure the CopyToReg nodes are glued to the call instruction which
761 // consumes the registers.
762 if (InGlue.getNode())
763 Ops.push_back(InGlue);
764
765 // Now the call itself.
766 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
767 Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
768 InGlue = Chain.getValue(1);
769
770 // Revert the stack pointer immediately after the call.
771 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
772 DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
773 InGlue = Chain.getValue(1);
774
775 // Now extract the return values. This is more or less the same as
776 // LowerFormalArguments.
777
778 // Assign locations to each value returned by this call.
779 SmallVector<CCValAssign, 16> RVLocs;
780 CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
781 *DAG.getContext());
782
783 // Set inreg flag manually for codegen generated library calls that
784 // return float.
785 if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
786 CLI.Ins[0].Flags.setInReg();
787
788 RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
789
790 // Copy all of the result registers out of their specified physreg.
791 for (unsigned i = 0; i != RVLocs.size(); ++i) {
792 CCValAssign &VA = RVLocs[i];
793 assert(!VA.needsCustom() && "Unexpected custom lowering");
794 Register Reg = VA.getLocReg();
795
796 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
797 // reside in the same register in the high and low bits. Reuse the
798 // CopyFromReg previous node to avoid duplicate copies.
799 SDValue RV;
800 if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
801 if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
802 RV = Chain.getValue(0);
803
804 // But usually we'll create a new CopyFromReg for a different register.
805 if (!RV.getNode()) {
806 RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
807 Chain = RV.getValue(1);
808 InGlue = Chain.getValue(2);
809 }
810
811 // The callee promoted the return value, so insert an Assert?ext SDNode so
812 // we won't promote the value again in this function.
813 switch (VA.getLocInfo()) {
814 case CCValAssign::SExt:
815 RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
816 DAG.getValueType(VA.getValVT()));
817 break;
818 case CCValAssign::ZExt:
819 RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
820 DAG.getValueType(VA.getValVT()));
821 break;
822 case CCValAssign::BCvt: {
823 // Extract a float return value from i64 with padding.
824 // 63 31 0
825 // +------+------+
826 // | float| 0 |
827 // +------+------+
828 assert(VA.getLocVT() == MVT::i64);
829 assert(VA.getValVT() == MVT::f32);
830 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
831 RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
832 MVT::f32, RV, Sub_f32),
833 0);
834 break;
835 }
836 default:
837 break;
838 }
839
840 // Truncate the register down to the return value type.
841 if (VA.isExtInLoc())
842 RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
843
844 InVals.push_back(RV);
845 }
846
847 return Chain;
848 }
849
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const850 bool VETargetLowering::isOffsetFoldingLegal(
851 const GlobalAddressSDNode *GA) const {
852 // VE uses 64 bit addressing, so we need multiple instructions to generate
853 // an address. Folding address with offset increases the number of
854 // instructions, so that we disable it here. Offsets will be folded in
855 // the DAG combine later if it worth to do so.
856 return false;
857 }
858
859 /// isFPImmLegal - Returns true if the target can instruction select the
860 /// specified FP immediate natively. If false, the legalizer will
861 /// materialize the FP immediate as a load from a constant pool.
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const862 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
863 bool ForCodeSize) const {
864 return VT == MVT::f32 || VT == MVT::f64;
865 }
866
867 /// Determine if the target supports unaligned memory accesses.
868 ///
869 /// This function returns true if the target allows unaligned memory accesses
870 /// of the specified type in the given address space. If true, it also returns
871 /// whether the unaligned memory access is "fast" in the last argument by
872 /// reference. This is used, for example, in situations where an array
873 /// copy/move/set is converted to a sequence of store operations. Its use
874 /// helps to ensure that such replacements don't generate code that causes an
875 /// alignment error (trap) on the target machine.
allowsMisalignedMemoryAccesses(EVT VT,unsigned AddrSpace,Align A,MachineMemOperand::Flags,bool * Fast) const876 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
877 unsigned AddrSpace,
878 Align A,
879 MachineMemOperand::Flags,
880 bool *Fast) const {
881 if (Fast) {
882 // It's fast anytime on VE
883 *Fast = true;
884 }
885 return true;
886 }
887
VETargetLowering(const TargetMachine & TM,const VESubtarget & STI)888 VETargetLowering::VETargetLowering(const TargetMachine &TM,
889 const VESubtarget &STI)
890 : TargetLowering(TM), Subtarget(&STI) {
891 // Instructions which use registers as conditionals examine all the
892 // bits (as does the pseudo SELECT_CC expansion). I don't think it
893 // matters much whether it's ZeroOrOneBooleanContent, or
894 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
895 // former.
896 setBooleanContents(ZeroOrOneBooleanContent);
897 setBooleanVectorContents(ZeroOrOneBooleanContent);
898
899 initRegisterClasses();
900 initSPUActions();
901 initVPUActions();
902
903 setStackPointerRegisterToSaveRestore(VE::SX11);
904
905 // We have target-specific dag combine patterns for the following nodes:
906 setTargetDAGCombine(ISD::TRUNCATE);
907
908 // Set function alignment to 16 bytes
909 setMinFunctionAlignment(Align(16));
910
911 // VE stores all argument by 8 bytes alignment
912 setMinStackArgumentAlignment(Align(8));
913
914 computeRegisterProperties(Subtarget->getRegisterInfo());
915 }
916
getTargetNodeName(unsigned Opcode) const917 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
918 #define TARGET_NODE_CASE(NAME) \
919 case VEISD::NAME: \
920 return "VEISD::" #NAME;
921 switch ((VEISD::NodeType)Opcode) {
922 case VEISD::FIRST_NUMBER:
923 break;
924 TARGET_NODE_CASE(CALL)
925 TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
926 TARGET_NODE_CASE(EH_SJLJ_SETJMP)
927 TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
928 TARGET_NODE_CASE(GETFUNPLT)
929 TARGET_NODE_CASE(GETSTACKTOP)
930 TARGET_NODE_CASE(GETTLSADDR)
931 TARGET_NODE_CASE(GLOBAL_BASE_REG)
932 TARGET_NODE_CASE(Hi)
933 TARGET_NODE_CASE(Lo)
934 TARGET_NODE_CASE(MEMBARRIER)
935 TARGET_NODE_CASE(RET_FLAG)
936 TARGET_NODE_CASE(TS1AM)
937 TARGET_NODE_CASE(VEC_UNPACK_LO)
938 TARGET_NODE_CASE(VEC_UNPACK_HI)
939 TARGET_NODE_CASE(VEC_PACK)
940 TARGET_NODE_CASE(VEC_BROADCAST)
941 TARGET_NODE_CASE(REPL_I32)
942 TARGET_NODE_CASE(REPL_F32)
943
944 TARGET_NODE_CASE(LEGALAVL)
945
946 // Register the VVP_* SDNodes.
947 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
948 #include "VVPNodes.def"
949 }
950 #undef TARGET_NODE_CASE
951 return nullptr;
952 }
953
getSetCCResultType(const DataLayout &,LLVMContext &,EVT VT) const954 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
955 EVT VT) const {
956 return MVT::i32;
957 }
958
959 // Convert to a target node and set target flags.
withTargetFlags(SDValue Op,unsigned TF,SelectionDAG & DAG) const960 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
961 SelectionDAG &DAG) const {
962 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
963 return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
964 GA->getValueType(0), GA->getOffset(), TF);
965
966 if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
967 return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
968 0, TF);
969
970 if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
971 return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
972 CP->getAlign(), CP->getOffset(), TF);
973
974 if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
975 return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
976 TF);
977
978 if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
979 return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
980
981 llvm_unreachable("Unhandled address SDNode");
982 }
983
984 // Split Op into high and low parts according to HiTF and LoTF.
985 // Return an ADD node combining the parts.
makeHiLoPair(SDValue Op,unsigned HiTF,unsigned LoTF,SelectionDAG & DAG) const986 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
987 SelectionDAG &DAG) const {
988 SDLoc DL(Op);
989 EVT VT = Op.getValueType();
990 SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
991 SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
992 return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
993 }
994
995 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
996 // or ExternalSymbol SDNode.
makeAddress(SDValue Op,SelectionDAG & DAG) const997 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
998 SDLoc DL(Op);
999 EVT PtrVT = Op.getValueType();
1000
1001 // Handle PIC mode first. VE needs a got load for every variable!
1002 if (isPositionIndependent()) {
1003 auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
1004
1005 if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
1006 (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1007 // Create following instructions for local linkage PIC code.
1008 // lea %reg, label@gotoff_lo
1009 // and %reg, %reg, (32)0
1010 // lea.sl %reg, label@gotoff_hi(%reg, %got)
1011 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
1012 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1013 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1014 return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1015 }
1016 // Create following instructions for not local linkage PIC code.
1017 // lea %reg, label@got_lo
1018 // and %reg, %reg, (32)0
1019 // lea.sl %reg, label@got_hi(%reg)
1020 // ld %reg, (%reg, %got)
1021 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
1022 VEMCExpr::VK_VE_GOT_LO32, DAG);
1023 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1024 SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1025 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
1026 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
1027 }
1028
1029 // This is one of the absolute code models.
1030 switch (getTargetMachine().getCodeModel()) {
1031 default:
1032 llvm_unreachable("Unsupported absolute code model");
1033 case CodeModel::Small:
1034 case CodeModel::Medium:
1035 case CodeModel::Large:
1036 // abs64.
1037 return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1038 }
1039 }
1040
1041 /// Custom Lower {
1042
1043 // The mappings for emitLeading/TrailingFence for VE is designed by following
1044 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
emitLeadingFence(IRBuilderBase & Builder,Instruction * Inst,AtomicOrdering Ord) const1045 Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1046 Instruction *Inst,
1047 AtomicOrdering Ord) const {
1048 switch (Ord) {
1049 case AtomicOrdering::NotAtomic:
1050 case AtomicOrdering::Unordered:
1051 llvm_unreachable("Invalid fence: unordered/non-atomic");
1052 case AtomicOrdering::Monotonic:
1053 case AtomicOrdering::Acquire:
1054 return nullptr; // Nothing to do
1055 case AtomicOrdering::Release:
1056 case AtomicOrdering::AcquireRelease:
1057 return Builder.CreateFence(AtomicOrdering::Release);
1058 case AtomicOrdering::SequentiallyConsistent:
1059 if (!Inst->hasAtomicStore())
1060 return nullptr; // Nothing to do
1061 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1062 }
1063 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1064 }
1065
emitTrailingFence(IRBuilderBase & Builder,Instruction * Inst,AtomicOrdering Ord) const1066 Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1067 Instruction *Inst,
1068 AtomicOrdering Ord) const {
1069 switch (Ord) {
1070 case AtomicOrdering::NotAtomic:
1071 case AtomicOrdering::Unordered:
1072 llvm_unreachable("Invalid fence: unordered/not-atomic");
1073 case AtomicOrdering::Monotonic:
1074 case AtomicOrdering::Release:
1075 return nullptr; // Nothing to do
1076 case AtomicOrdering::Acquire:
1077 case AtomicOrdering::AcquireRelease:
1078 return Builder.CreateFence(AtomicOrdering::Acquire);
1079 case AtomicOrdering::SequentiallyConsistent:
1080 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1081 }
1082 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1083 }
1084
lowerATOMIC_FENCE(SDValue Op,SelectionDAG & DAG) const1085 SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1086 SelectionDAG &DAG) const {
1087 SDLoc DL(Op);
1088 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
1089 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
1090 SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
1091 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
1092
1093 // VE uses Release consistency, so need a fence instruction if it is a
1094 // cross-thread fence.
1095 if (FenceSSID == SyncScope::System) {
1096 switch (FenceOrdering) {
1097 case AtomicOrdering::NotAtomic:
1098 case AtomicOrdering::Unordered:
1099 case AtomicOrdering::Monotonic:
1100 // No need to generate fencem instruction here.
1101 break;
1102 case AtomicOrdering::Acquire:
1103 // Generate "fencem 2" as acquire fence.
1104 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1105 DAG.getTargetConstant(2, DL, MVT::i32),
1106 Op.getOperand(0)),
1107 0);
1108 case AtomicOrdering::Release:
1109 // Generate "fencem 1" as release fence.
1110 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1111 DAG.getTargetConstant(1, DL, MVT::i32),
1112 Op.getOperand(0)),
1113 0);
1114 case AtomicOrdering::AcquireRelease:
1115 case AtomicOrdering::SequentiallyConsistent:
1116 // Generate "fencem 3" as acq_rel and seq_cst fence.
1117 // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
1118 // so seq_cst may require more instruction for them.
1119 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1120 DAG.getTargetConstant(3, DL, MVT::i32),
1121 Op.getOperand(0)),
1122 0);
1123 }
1124 }
1125
1126 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1127 return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1128 }
1129
1130 TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const1131 VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1132 // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1133 if (AI->getOperation() == AtomicRMWInst::Xchg) {
1134 return AtomicExpansionKind::None;
1135 }
1136 // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1137
1138 // Otherwise, expand it using compare and exchange instruction to not call
1139 // __sync_fetch_and_* functions.
1140 return AtomicExpansionKind::CmpXChg;
1141 }
1142
prepareTS1AM(SDValue Op,SelectionDAG & DAG,SDValue & Flag,SDValue & Bits)1143 static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1144 SDValue &Bits) {
1145 SDLoc DL(Op);
1146 AtomicSDNode *N = cast<AtomicSDNode>(Op);
1147 SDValue Ptr = N->getOperand(1);
1148 SDValue Val = N->getOperand(2);
1149 EVT PtrVT = Ptr.getValueType();
1150 bool Byte = N->getMemoryVT() == MVT::i8;
1151 // Remainder = AND Ptr, 3
1152 // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1153 // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1154 // Bits = Remainder << 3
1155 // NewVal = Val << Bits
1156 SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1157 SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1158 SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1159 : DAG.getConstant(3, DL, MVT::i32);
1160 Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1161 Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1162 return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1163 }
1164
finalizeTS1AM(SDValue Op,SelectionDAG & DAG,SDValue Data,SDValue Bits)1165 static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1166 SDValue Bits) {
1167 SDLoc DL(Op);
1168 EVT VT = Data.getValueType();
1169 bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1170 // NewData = Data >> Bits
1171 // Result = NewData & 0xff ; If Byte is true (1 byte)
1172 // Result = NewData & 0xffff ; If Byte is false (2 bytes)
1173
1174 SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1175 return DAG.getNode(ISD::AND, DL, VT,
1176 {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1177 }
1178
lowerATOMIC_SWAP(SDValue Op,SelectionDAG & DAG) const1179 SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1180 SelectionDAG &DAG) const {
1181 SDLoc DL(Op);
1182 AtomicSDNode *N = cast<AtomicSDNode>(Op);
1183
1184 if (N->getMemoryVT() == MVT::i8) {
1185 // For i8, use "ts1am"
1186 // Input:
1187 // ATOMIC_SWAP Ptr, Val, Order
1188 //
1189 // Output:
1190 // Remainder = AND Ptr, 3
1191 // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1192 // Bits = Remainder << 3
1193 // NewVal = Val << Bits
1194 //
1195 // Aligned = AND Ptr, -4
1196 // Data = TS1AM Aligned, Flag, NewVal
1197 //
1198 // NewData = Data >> Bits
1199 // Result = NewData & 0xff ; 1 byte result
1200 SDValue Flag;
1201 SDValue Bits;
1202 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1203
1204 SDValue Ptr = N->getOperand(1);
1205 SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1206 {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1207 SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1208 DAG.getVTList(Op.getNode()->getValueType(0),
1209 Op.getNode()->getValueType(1)),
1210 {N->getChain(), Aligned, Flag, NewVal},
1211 N->getMemOperand());
1212
1213 SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1214 SDValue Chain = TS1AM.getValue(1);
1215 return DAG.getMergeValues({Result, Chain}, DL);
1216 }
1217 if (N->getMemoryVT() == MVT::i16) {
1218 // For i16, use "ts1am"
1219 SDValue Flag;
1220 SDValue Bits;
1221 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1222
1223 SDValue Ptr = N->getOperand(1);
1224 SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1225 {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1226 SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1227 DAG.getVTList(Op.getNode()->getValueType(0),
1228 Op.getNode()->getValueType(1)),
1229 {N->getChain(), Aligned, Flag, NewVal},
1230 N->getMemOperand());
1231
1232 SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1233 SDValue Chain = TS1AM.getValue(1);
1234 return DAG.getMergeValues({Result, Chain}, DL);
1235 }
1236 // Otherwise, let llvm legalize it.
1237 return Op;
1238 }
1239
lowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const1240 SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1241 SelectionDAG &DAG) const {
1242 return makeAddress(Op, DAG);
1243 }
1244
lowerBlockAddress(SDValue Op,SelectionDAG & DAG) const1245 SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1246 SelectionDAG &DAG) const {
1247 return makeAddress(Op, DAG);
1248 }
1249
lowerConstantPool(SDValue Op,SelectionDAG & DAG) const1250 SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1251 SelectionDAG &DAG) const {
1252 return makeAddress(Op, DAG);
1253 }
1254
1255 SDValue
lowerToTLSGeneralDynamicModel(SDValue Op,SelectionDAG & DAG) const1256 VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1257 SelectionDAG &DAG) const {
1258 SDLoc DL(Op);
1259
1260 // Generate the following code:
1261 // t1: ch,glue = callseq_start t0, 0, 0
1262 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1263 // t3: ch,glue = callseq_end t2, 0, 0, t2:2
1264 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1265 SDValue Label = withTargetFlags(Op, 0, DAG);
1266 EVT PtrVT = Op.getValueType();
1267
1268 // Lowering the machine isd will make sure everything is in the right
1269 // location.
1270 SDValue Chain = DAG.getEntryNode();
1271 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1272 const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1273 DAG.getMachineFunction(), CallingConv::C);
1274 Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1275 SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1276 Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1277 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, DL, true),
1278 DAG.getIntPtrConstant(0, DL, true),
1279 Chain.getValue(1), DL);
1280 Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1281
1282 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1283 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1284 MFI.setHasCalls(true);
1285
1286 // Also generate code to prepare a GOT register if it is PIC.
1287 if (isPositionIndependent()) {
1288 MachineFunction &MF = DAG.getMachineFunction();
1289 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1290 }
1291
1292 return Chain;
1293 }
1294
lowerGlobalTLSAddress(SDValue Op,SelectionDAG & DAG) const1295 SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1296 SelectionDAG &DAG) const {
1297 // The current implementation of nld (2.26) doesn't allow local exec model
1298 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1299 // generate the general dynamic model code sequence.
1300 //
1301 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1302 return lowerToTLSGeneralDynamicModel(Op, DAG);
1303 }
1304
lowerJumpTable(SDValue Op,SelectionDAG & DAG) const1305 SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1306 return makeAddress(Op, DAG);
1307 }
1308
1309 // Lower a f128 load into two f64 loads.
lowerLoadF128(SDValue Op,SelectionDAG & DAG)1310 static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1311 SDLoc DL(Op);
1312 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1313 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1314 unsigned Alignment = LdNode->getAlign().value();
1315 if (Alignment > 8)
1316 Alignment = 8;
1317
1318 SDValue Lo64 =
1319 DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1320 LdNode->getPointerInfo(), Alignment,
1321 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1322 : MachineMemOperand::MONone);
1323 EVT AddrVT = LdNode->getBasePtr().getValueType();
1324 SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1325 DAG.getConstant(8, DL, AddrVT));
1326 SDValue Hi64 =
1327 DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1328 LdNode->getPointerInfo(), Alignment,
1329 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1330 : MachineMemOperand::MONone);
1331
1332 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1333 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1334
1335 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1336 SDNode *InFP128 =
1337 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1338 InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1339 SDValue(InFP128, 0), Hi64, SubRegEven);
1340 InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1341 SDValue(InFP128, 0), Lo64, SubRegOdd);
1342 SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1343 SDValue(Hi64.getNode(), 1)};
1344 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1345 SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1346 return DAG.getMergeValues(Ops, DL);
1347 }
1348
1349 // Lower a vXi1 load into following instructions
1350 // LDrii %1, (,%addr)
1351 // LVMxir %vm, 0, %1
1352 // LDrii %2, 8(,%addr)
1353 // LVMxir %vm, 0, %2
1354 // ...
lowerLoadI1(SDValue Op,SelectionDAG & DAG)1355 static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1356 SDLoc DL(Op);
1357 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1358 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1359
1360 SDValue BasePtr = LdNode->getBasePtr();
1361 unsigned Alignment = LdNode->getAlign().value();
1362 if (Alignment > 8)
1363 Alignment = 8;
1364
1365 EVT AddrVT = BasePtr.getValueType();
1366 EVT MemVT = LdNode->getMemoryVT();
1367 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1368 SDValue OutChains[4];
1369 SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1370 for (int i = 0; i < 4; ++i) {
1371 // Generate load dag and prepare chains.
1372 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1373 DAG.getConstant(8 * i, DL, AddrVT));
1374 SDValue Val =
1375 DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1376 LdNode->getPointerInfo(), Alignment,
1377 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1378 : MachineMemOperand::MONone);
1379 OutChains[i] = SDValue(Val.getNode(), 1);
1380
1381 VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
1382 DAG.getTargetConstant(i, DL, MVT::i64), Val,
1383 SDValue(VM, 0));
1384 }
1385 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1386 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1387 return DAG.getMergeValues(Ops, DL);
1388 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1389 SDValue OutChains[8];
1390 SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1391 for (int i = 0; i < 8; ++i) {
1392 // Generate load dag and prepare chains.
1393 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1394 DAG.getConstant(8 * i, DL, AddrVT));
1395 SDValue Val =
1396 DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1397 LdNode->getPointerInfo(), Alignment,
1398 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1399 : MachineMemOperand::MONone);
1400 OutChains[i] = SDValue(Val.getNode(), 1);
1401
1402 VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
1403 DAG.getTargetConstant(i, DL, MVT::i64), Val,
1404 SDValue(VM, 0));
1405 }
1406 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1407 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1408 return DAG.getMergeValues(Ops, DL);
1409 } else {
1410 // Otherwise, ask llvm to expand it.
1411 return SDValue();
1412 }
1413 }
1414
lowerLOAD(SDValue Op,SelectionDAG & DAG) const1415 SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1416 LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1417
1418 EVT MemVT = LdNode->getMemoryVT();
1419
1420 // Dispatch to vector isel.
1421 if (MemVT.isVector() && !isMaskType(MemVT))
1422 return lowerToVVP(Op, DAG);
1423
1424 SDValue BasePtr = LdNode->getBasePtr();
1425 if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1426 // Do not expand store instruction with frame index here because of
1427 // dependency problems. We expand it later in eliminateFrameIndex().
1428 return Op;
1429 }
1430
1431 if (MemVT == MVT::f128)
1432 return lowerLoadF128(Op, DAG);
1433 if (isMaskType(MemVT))
1434 return lowerLoadI1(Op, DAG);
1435
1436 return Op;
1437 }
1438
1439 // Lower a f128 store into two f64 stores.
lowerStoreF128(SDValue Op,SelectionDAG & DAG)1440 static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1441 SDLoc DL(Op);
1442 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1443 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1444
1445 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1446 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1447
1448 SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1449 StNode->getValue(), SubRegEven);
1450 SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1451 StNode->getValue(), SubRegOdd);
1452
1453 unsigned Alignment = StNode->getAlign().value();
1454 if (Alignment > 8)
1455 Alignment = 8;
1456
1457 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1458 SDValue OutChains[2];
1459 OutChains[0] =
1460 DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1461 StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1462 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1463 : MachineMemOperand::MONone);
1464 EVT AddrVT = StNode->getBasePtr().getValueType();
1465 SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1466 DAG.getConstant(8, DL, AddrVT));
1467 OutChains[1] =
1468 DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1469 MachinePointerInfo(), Alignment,
1470 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1471 : MachineMemOperand::MONone);
1472 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1473 }
1474
1475 // Lower a vXi1 store into following instructions
1476 // SVMi %1, %vm, 0
1477 // STrii %1, (,%addr)
1478 // SVMi %2, %vm, 1
1479 // STrii %2, 8(,%addr)
1480 // ...
lowerStoreI1(SDValue Op,SelectionDAG & DAG)1481 static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1482 SDLoc DL(Op);
1483 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1484 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1485
1486 SDValue BasePtr = StNode->getBasePtr();
1487 unsigned Alignment = StNode->getAlign().value();
1488 if (Alignment > 8)
1489 Alignment = 8;
1490 EVT AddrVT = BasePtr.getValueType();
1491 EVT MemVT = StNode->getMemoryVT();
1492 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1493 SDValue OutChains[4];
1494 for (int i = 0; i < 4; ++i) {
1495 SDNode *V =
1496 DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
1497 DAG.getTargetConstant(i, DL, MVT::i64));
1498 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1499 DAG.getConstant(8 * i, DL, AddrVT));
1500 OutChains[i] =
1501 DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1502 MachinePointerInfo(), Alignment,
1503 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1504 : MachineMemOperand::MONone);
1505 }
1506 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1507 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1508 SDValue OutChains[8];
1509 for (int i = 0; i < 8; ++i) {
1510 SDNode *V =
1511 DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
1512 DAG.getTargetConstant(i, DL, MVT::i64));
1513 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1514 DAG.getConstant(8 * i, DL, AddrVT));
1515 OutChains[i] =
1516 DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1517 MachinePointerInfo(), Alignment,
1518 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1519 : MachineMemOperand::MONone);
1520 }
1521 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1522 } else {
1523 // Otherwise, ask llvm to expand it.
1524 return SDValue();
1525 }
1526 }
1527
lowerSTORE(SDValue Op,SelectionDAG & DAG) const1528 SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1529 StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1530 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1531
1532 // always expand non-mask vector loads to VVP
1533 EVT MemVT = StNode->getMemoryVT();
1534 if (MemVT.isVector() && !isMaskType(MemVT))
1535 return lowerToVVP(Op, DAG);
1536
1537 SDValue BasePtr = StNode->getBasePtr();
1538 if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1539 // Do not expand store instruction with frame index here because of
1540 // dependency problems. We expand it later in eliminateFrameIndex().
1541 return Op;
1542 }
1543
1544 if (MemVT == MVT::f128)
1545 return lowerStoreF128(Op, DAG);
1546 if (isMaskType(MemVT))
1547 return lowerStoreI1(Op, DAG);
1548
1549 // Otherwise, ask llvm to expand it.
1550 return SDValue();
1551 }
1552
lowerVASTART(SDValue Op,SelectionDAG & DAG) const1553 SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1554 MachineFunction &MF = DAG.getMachineFunction();
1555 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1556 auto PtrVT = getPointerTy(DAG.getDataLayout());
1557
1558 // Need frame address to find the address of VarArgsFrameIndex.
1559 MF.getFrameInfo().setFrameAddressIsTaken(true);
1560
1561 // vastart just stores the address of the VarArgsFrameIndex slot into the
1562 // memory location argument.
1563 SDLoc DL(Op);
1564 SDValue Offset =
1565 DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1566 DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1567 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1568 return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1569 MachinePointerInfo(SV));
1570 }
1571
lowerVAARG(SDValue Op,SelectionDAG & DAG) const1572 SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1573 SDNode *Node = Op.getNode();
1574 EVT VT = Node->getValueType(0);
1575 SDValue InChain = Node->getOperand(0);
1576 SDValue VAListPtr = Node->getOperand(1);
1577 EVT PtrVT = VAListPtr.getValueType();
1578 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1579 SDLoc DL(Node);
1580 SDValue VAList =
1581 DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1582 SDValue Chain = VAList.getValue(1);
1583 SDValue NextPtr;
1584
1585 if (VT == MVT::f128) {
1586 // VE f128 values must be stored with 16 bytes alignment. We don't
1587 // know the actual alignment of VAList, so we take alignment of it
1588 // dynamically.
1589 int Align = 16;
1590 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1591 DAG.getConstant(Align - 1, DL, PtrVT));
1592 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1593 DAG.getConstant(-Align, DL, PtrVT));
1594 // Increment the pointer, VAList, by 16 to the next vaarg.
1595 NextPtr =
1596 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1597 } else if (VT == MVT::f32) {
1598 // float --> need special handling like below.
1599 // 0 4
1600 // +------+------+
1601 // | empty| float|
1602 // +------+------+
1603 // Increment the pointer, VAList, by 8 to the next vaarg.
1604 NextPtr =
1605 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1606 // Then, adjust VAList.
1607 unsigned InternalOffset = 4;
1608 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1609 DAG.getConstant(InternalOffset, DL, PtrVT));
1610 } else {
1611 // Increment the pointer, VAList, by 8 to the next vaarg.
1612 NextPtr =
1613 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1614 }
1615
1616 // Store the incremented VAList to the legalized pointer.
1617 InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1618
1619 // Load the actual argument out of the pointer VAList.
1620 // We can't count on greater alignment than the word size.
1621 return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
1622 std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
1623 }
1624
lowerDYNAMIC_STACKALLOC(SDValue Op,SelectionDAG & DAG) const1625 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1626 SelectionDAG &DAG) const {
1627 // Generate following code.
1628 // (void)__llvm_grow_stack(size);
1629 // ret = GETSTACKTOP; // pseudo instruction
1630 SDLoc DL(Op);
1631
1632 // Get the inputs.
1633 SDNode *Node = Op.getNode();
1634 SDValue Chain = Op.getOperand(0);
1635 SDValue Size = Op.getOperand(1);
1636 MaybeAlign Alignment(Op.getConstantOperandVal(2));
1637 EVT VT = Node->getValueType(0);
1638
1639 // Chain the dynamic stack allocation so that it doesn't modify the stack
1640 // pointer when other instructions are using the stack.
1641 Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1642
1643 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1644 Align StackAlign = TFI.getStackAlign();
1645 bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1646
1647 // Prepare arguments
1648 TargetLowering::ArgListTy Args;
1649 TargetLowering::ArgListEntry Entry;
1650 Entry.Node = Size;
1651 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1652 Args.push_back(Entry);
1653 if (NeedsAlign) {
1654 Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1655 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1656 Args.push_back(Entry);
1657 }
1658 Type *RetTy = Type::getVoidTy(*DAG.getContext());
1659
1660 EVT PtrVT = Op.getValueType();
1661 SDValue Callee;
1662 if (NeedsAlign) {
1663 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1664 } else {
1665 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1666 }
1667
1668 TargetLowering::CallLoweringInfo CLI(DAG);
1669 CLI.setDebugLoc(DL)
1670 .setChain(Chain)
1671 .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1672 .setDiscardResult(true);
1673 std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1674 Chain = pair.second;
1675 SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1676 if (NeedsAlign) {
1677 Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1678 DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1679 Result = DAG.getNode(ISD::AND, DL, VT, Result,
1680 DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1681 }
1682 // Chain = Result.getValue(1);
1683 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
1684 DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
1685
1686 SDValue Ops[2] = {Result, Chain};
1687 return DAG.getMergeValues(Ops, DL);
1688 }
1689
lowerEH_SJLJ_LONGJMP(SDValue Op,SelectionDAG & DAG) const1690 SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1691 SelectionDAG &DAG) const {
1692 SDLoc DL(Op);
1693 return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1694 Op.getOperand(1));
1695 }
1696
lowerEH_SJLJ_SETJMP(SDValue Op,SelectionDAG & DAG) const1697 SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1698 SelectionDAG &DAG) const {
1699 SDLoc DL(Op);
1700 return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1701 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1702 Op.getOperand(1));
1703 }
1704
lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,SelectionDAG & DAG) const1705 SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1706 SelectionDAG &DAG) const {
1707 SDLoc DL(Op);
1708 return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1709 Op.getOperand(0));
1710 }
1711
lowerFRAMEADDR(SDValue Op,SelectionDAG & DAG,const VETargetLowering & TLI,const VESubtarget * Subtarget)1712 static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1713 const VETargetLowering &TLI,
1714 const VESubtarget *Subtarget) {
1715 SDLoc DL(Op);
1716 MachineFunction &MF = DAG.getMachineFunction();
1717 EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1718
1719 MachineFrameInfo &MFI = MF.getFrameInfo();
1720 MFI.setFrameAddressIsTaken(true);
1721
1722 unsigned Depth = Op.getConstantOperandVal(0);
1723 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1724 Register FrameReg = RegInfo->getFrameRegister(MF);
1725 SDValue FrameAddr =
1726 DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1727 while (Depth--)
1728 FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1729 FrameAddr, MachinePointerInfo());
1730 return FrameAddr;
1731 }
1732
lowerRETURNADDR(SDValue Op,SelectionDAG & DAG,const VETargetLowering & TLI,const VESubtarget * Subtarget)1733 static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1734 const VETargetLowering &TLI,
1735 const VESubtarget *Subtarget) {
1736 MachineFunction &MF = DAG.getMachineFunction();
1737 MachineFrameInfo &MFI = MF.getFrameInfo();
1738 MFI.setReturnAddressIsTaken(true);
1739
1740 if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1741 return SDValue();
1742
1743 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1744
1745 SDLoc DL(Op);
1746 EVT VT = Op.getValueType();
1747 SDValue Offset = DAG.getConstant(8, DL, VT);
1748 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1749 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1750 MachinePointerInfo());
1751 }
1752
lowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const1753 SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1754 SelectionDAG &DAG) const {
1755 SDLoc DL(Op);
1756 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1757 switch (IntNo) {
1758 default: // Don't custom lower most intrinsics.
1759 return SDValue();
1760 case Intrinsic::eh_sjlj_lsda: {
1761 MachineFunction &MF = DAG.getMachineFunction();
1762 MVT VT = Op.getSimpleValueType();
1763 const VETargetMachine *TM =
1764 static_cast<const VETargetMachine *>(&DAG.getTarget());
1765
1766 // Create GCC_except_tableXX string. The real symbol for that will be
1767 // generated in EHStreamer::emitExceptionTable() later. So, we just
1768 // borrow it's name here.
1769 TM->getStrList()->push_back(std::string(
1770 (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1771 SDValue Addr =
1772 DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1773 if (isPositionIndependent()) {
1774 Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
1775 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1776 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1777 return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1778 }
1779 return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1780 }
1781 }
1782 }
1783
getUniqueInsertion(SDNode * N,unsigned & UniqueIdx)1784 static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1785 if (!isa<BuildVectorSDNode>(N))
1786 return false;
1787 const auto *BVN = cast<BuildVectorSDNode>(N);
1788
1789 // Find first non-undef insertion.
1790 unsigned Idx;
1791 for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1792 auto ElemV = BVN->getOperand(Idx);
1793 if (!ElemV->isUndef())
1794 break;
1795 }
1796 // Catch the (hypothetical) all-undef case.
1797 if (Idx == BVN->getNumOperands())
1798 return false;
1799 // Remember insertion.
1800 UniqueIdx = Idx++;
1801 // Verify that all other insertions are undef.
1802 for (; Idx < BVN->getNumOperands(); ++Idx) {
1803 auto ElemV = BVN->getOperand(Idx);
1804 if (!ElemV->isUndef())
1805 return false;
1806 }
1807 return true;
1808 }
1809
getSplatValue(SDNode * N)1810 static SDValue getSplatValue(SDNode *N) {
1811 if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1812 return BuildVec->getSplatValue();
1813 }
1814 return SDValue();
1815 }
1816
lowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const1817 SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1818 SelectionDAG &DAG) const {
1819 VECustomDAG CDAG(DAG, Op);
1820 MVT ResultVT = Op.getSimpleValueType();
1821
1822 // If there is just one element, expand to INSERT_VECTOR_ELT.
1823 unsigned UniqueIdx;
1824 if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1825 SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
1826 auto ElemV = Op->getOperand(UniqueIdx);
1827 SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
1828 return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
1829 }
1830
1831 // Else emit a broadcast.
1832 if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1833 unsigned NumEls = ResultVT.getVectorNumElements();
1834 auto AVL = CDAG.getConstant(NumEls, MVT::i32);
1835 return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
1836 }
1837
1838 // Expand
1839 return SDValue();
1840 }
1841
1842 TargetLowering::LegalizeAction
getCustomOperationAction(SDNode & Op) const1843 VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1844 // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1845 // these operations (transform nodes such that their AVL parameter refers to
1846 // packs of 64bit, instead of number of elements.
1847
1848 // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1849 // re-visit them.
1850 if (isPackingSupportOpcode(Op.getOpcode()))
1851 return Legal;
1852
1853 // Custom lower to legalize AVL for packed mode.
1854 if (isVVPOrVEC(Op.getOpcode()))
1855 return Custom;
1856 return Legal;
1857 }
1858
LowerOperation(SDValue Op,SelectionDAG & DAG) const1859 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1860 LLVM_DEBUG(dbgs() << "::LowerOperation"; Op->print(dbgs()););
1861 unsigned Opcode = Op.getOpcode();
1862
1863 /// Scalar isel.
1864 switch (Opcode) {
1865 case ISD::ATOMIC_FENCE:
1866 return lowerATOMIC_FENCE(Op, DAG);
1867 case ISD::ATOMIC_SWAP:
1868 return lowerATOMIC_SWAP(Op, DAG);
1869 case ISD::BlockAddress:
1870 return lowerBlockAddress(Op, DAG);
1871 case ISD::ConstantPool:
1872 return lowerConstantPool(Op, DAG);
1873 case ISD::DYNAMIC_STACKALLOC:
1874 return lowerDYNAMIC_STACKALLOC(Op, DAG);
1875 case ISD::EH_SJLJ_LONGJMP:
1876 return lowerEH_SJLJ_LONGJMP(Op, DAG);
1877 case ISD::EH_SJLJ_SETJMP:
1878 return lowerEH_SJLJ_SETJMP(Op, DAG);
1879 case ISD::EH_SJLJ_SETUP_DISPATCH:
1880 return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1881 case ISD::FRAMEADDR:
1882 return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1883 case ISD::GlobalAddress:
1884 return lowerGlobalAddress(Op, DAG);
1885 case ISD::GlobalTLSAddress:
1886 return lowerGlobalTLSAddress(Op, DAG);
1887 case ISD::INTRINSIC_WO_CHAIN:
1888 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1889 case ISD::JumpTable:
1890 return lowerJumpTable(Op, DAG);
1891 case ISD::LOAD:
1892 return lowerLOAD(Op, DAG);
1893 case ISD::RETURNADDR:
1894 return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1895 case ISD::BUILD_VECTOR:
1896 return lowerBUILD_VECTOR(Op, DAG);
1897 case ISD::STORE:
1898 return lowerSTORE(Op, DAG);
1899 case ISD::VASTART:
1900 return lowerVASTART(Op, DAG);
1901 case ISD::VAARG:
1902 return lowerVAARG(Op, DAG);
1903
1904 case ISD::INSERT_VECTOR_ELT:
1905 return lowerINSERT_VECTOR_ELT(Op, DAG);
1906 case ISD::EXTRACT_VECTOR_ELT:
1907 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1908 }
1909
1910 /// Vector isel.
1911 LLVM_DEBUG(dbgs() << "::LowerOperation_VVP"; Op->print(dbgs()););
1912 if (ISD::isVPOpcode(Opcode))
1913 return lowerToVVP(Op, DAG);
1914
1915 switch (Opcode) {
1916 default:
1917 llvm_unreachable("Should not custom lower this!");
1918
1919 // Legalize the AVL of this internal node.
1920 case VEISD::VEC_BROADCAST:
1921 #define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1922 #include "VVPNodes.def"
1923 // AVL already legalized.
1924 if (getAnnotatedNodeAVL(Op).second)
1925 return Op;
1926 return legalizeInternalVectorOp(Op, DAG);
1927
1928 // Translate into a VEC_*/VVP_* layer operation.
1929 case ISD::MLOAD:
1930 case ISD::MSTORE:
1931 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1932 #include "VVPNodes.def"
1933 if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
1934 return splitMaskArithmetic(Op, DAG);
1935 return lowerToVVP(Op, DAG);
1936 }
1937 }
1938 /// } Custom Lower
1939
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const1940 void VETargetLowering::ReplaceNodeResults(SDNode *N,
1941 SmallVectorImpl<SDValue> &Results,
1942 SelectionDAG &DAG) const {
1943 switch (N->getOpcode()) {
1944 case ISD::ATOMIC_SWAP:
1945 // Let LLVM expand atomic swap instruction through LowerOperation.
1946 return;
1947 default:
1948 LLVM_DEBUG(N->dumpr(&DAG));
1949 llvm_unreachable("Do not know how to custom type legalize this operation!");
1950 }
1951 }
1952
1953 /// JumpTable for VE.
1954 ///
1955 /// VE cannot generate relocatable symbol in jump table. VE cannot
1956 /// generate expressions using symbols in both text segment and data
1957 /// segment like below.
1958 /// .4byte .LBB0_2-.LJTI0_0
1959 /// So, we generate offset from the top of function like below as
1960 /// a custom label.
1961 /// .4byte .LBB0_2-<function name>
1962
getJumpTableEncoding() const1963 unsigned VETargetLowering::getJumpTableEncoding() const {
1964 // Use custom label for PIC.
1965 if (isPositionIndependent())
1966 return MachineJumpTableInfo::EK_Custom32;
1967
1968 // Otherwise, use the normal jump table encoding heuristics.
1969 return TargetLowering::getJumpTableEncoding();
1970 }
1971
LowerCustomJumpTableEntry(const MachineJumpTableInfo * MJTI,const MachineBasicBlock * MBB,unsigned Uid,MCContext & Ctx) const1972 const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1973 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1974 unsigned Uid, MCContext &Ctx) const {
1975 assert(isPositionIndependent());
1976
1977 // Generate custom label for PIC like below.
1978 // .4bytes .LBB0_2-<function name>
1979 const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1980 MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1981 const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1982 return MCBinaryExpr::createSub(Value, Base, Ctx);
1983 }
1984
getPICJumpTableRelocBase(SDValue Table,SelectionDAG & DAG) const1985 SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1986 SelectionDAG &DAG) const {
1987 assert(isPositionIndependent());
1988 SDLoc DL(Table);
1989 Function *Function = &DAG.getMachineFunction().getFunction();
1990 assert(Function != nullptr);
1991 auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
1992
1993 // In the jump table, we have following values in PIC mode.
1994 // .4bytes .LBB0_2-<function name>
1995 // We need to add this value and the address of this function to generate
1996 // .LBB0_2 label correctly under PIC mode. So, we want to generate following
1997 // instructions:
1998 // lea %reg, fun@gotoff_lo
1999 // and %reg, %reg, (32)0
2000 // lea.sl %reg, fun@gotoff_hi(%reg, %got)
2001 // In order to do so, we need to genarate correctly marked DAG node using
2002 // makeHiLoPair.
2003 SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
2004 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
2005 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
2006 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
2007 return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
2008 }
2009
prepareMBB(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,MachineBasicBlock * TargetBB,const DebugLoc & DL) const2010 Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2011 MachineBasicBlock::iterator I,
2012 MachineBasicBlock *TargetBB,
2013 const DebugLoc &DL) const {
2014 MachineFunction *MF = MBB.getParent();
2015 MachineRegisterInfo &MRI = MF->getRegInfo();
2016 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2017
2018 const TargetRegisterClass *RC = &VE::I64RegClass;
2019 Register Tmp1 = MRI.createVirtualRegister(RC);
2020 Register Tmp2 = MRI.createVirtualRegister(RC);
2021 Register Result = MRI.createVirtualRegister(RC);
2022
2023 if (isPositionIndependent()) {
2024 // Create following instructions for local linkage PIC code.
2025 // lea %Tmp1, TargetBB@gotoff_lo
2026 // and %Tmp2, %Tmp1, (32)0
2027 // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2028 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2029 .addImm(0)
2030 .addImm(0)
2031 .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
2032 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2033 .addReg(Tmp1, getKillRegState(true))
2034 .addImm(M0(32));
2035 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2036 .addReg(VE::SX15)
2037 .addReg(Tmp2, getKillRegState(true))
2038 .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
2039 } else {
2040 // Create following instructions for non-PIC code.
2041 // lea %Tmp1, TargetBB@lo
2042 // and %Tmp2, %Tmp1, (32)0
2043 // lea.sl %Result, TargetBB@hi(%Tmp2)
2044 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2045 .addImm(0)
2046 .addImm(0)
2047 .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
2048 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2049 .addReg(Tmp1, getKillRegState(true))
2050 .addImm(M0(32));
2051 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2052 .addReg(Tmp2, getKillRegState(true))
2053 .addImm(0)
2054 .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
2055 }
2056 return Result;
2057 }
2058
prepareSymbol(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,StringRef Symbol,const DebugLoc & DL,bool IsLocal=false,bool IsCall=false) const2059 Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2060 MachineBasicBlock::iterator I,
2061 StringRef Symbol, const DebugLoc &DL,
2062 bool IsLocal = false,
2063 bool IsCall = false) const {
2064 MachineFunction *MF = MBB.getParent();
2065 MachineRegisterInfo &MRI = MF->getRegInfo();
2066 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2067
2068 const TargetRegisterClass *RC = &VE::I64RegClass;
2069 Register Result = MRI.createVirtualRegister(RC);
2070
2071 if (isPositionIndependent()) {
2072 if (IsCall && !IsLocal) {
2073 // Create following instructions for non-local linkage PIC code function
2074 // calls. These instructions uses IC and magic number -24, so we expand
2075 // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2076 // lea %Reg, Symbol@plt_lo(-24)
2077 // and %Reg, %Reg, (32)0
2078 // sic %s16
2079 // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2080 BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
2081 .addExternalSymbol("abort");
2082 } else if (IsLocal) {
2083 Register Tmp1 = MRI.createVirtualRegister(RC);
2084 Register Tmp2 = MRI.createVirtualRegister(RC);
2085 // Create following instructions for local linkage PIC code.
2086 // lea %Tmp1, Symbol@gotoff_lo
2087 // and %Tmp2, %Tmp1, (32)0
2088 // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2089 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2090 .addImm(0)
2091 .addImm(0)
2092 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
2093 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2094 .addReg(Tmp1, getKillRegState(true))
2095 .addImm(M0(32));
2096 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2097 .addReg(VE::SX15)
2098 .addReg(Tmp2, getKillRegState(true))
2099 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
2100 } else {
2101 Register Tmp1 = MRI.createVirtualRegister(RC);
2102 Register Tmp2 = MRI.createVirtualRegister(RC);
2103 // Create following instructions for not local linkage PIC code.
2104 // lea %Tmp1, Symbol@got_lo
2105 // and %Tmp2, %Tmp1, (32)0
2106 // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2107 // ld %Result, 0(%Tmp3)
2108 Register Tmp3 = MRI.createVirtualRegister(RC);
2109 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2110 .addImm(0)
2111 .addImm(0)
2112 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
2113 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2114 .addReg(Tmp1, getKillRegState(true))
2115 .addImm(M0(32));
2116 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
2117 .addReg(VE::SX15)
2118 .addReg(Tmp2, getKillRegState(true))
2119 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
2120 BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
2121 .addReg(Tmp3, getKillRegState(true))
2122 .addImm(0)
2123 .addImm(0);
2124 }
2125 } else {
2126 Register Tmp1 = MRI.createVirtualRegister(RC);
2127 Register Tmp2 = MRI.createVirtualRegister(RC);
2128 // Create following instructions for non-PIC code.
2129 // lea %Tmp1, Symbol@lo
2130 // and %Tmp2, %Tmp1, (32)0
2131 // lea.sl %Result, Symbol@hi(%Tmp2)
2132 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2133 .addImm(0)
2134 .addImm(0)
2135 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
2136 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2137 .addReg(Tmp1, getKillRegState(true))
2138 .addImm(M0(32));
2139 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2140 .addReg(Tmp2, getKillRegState(true))
2141 .addImm(0)
2142 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
2143 }
2144 return Result;
2145 }
2146
setupEntryBlockForSjLj(MachineInstr & MI,MachineBasicBlock * MBB,MachineBasicBlock * DispatchBB,int FI,int Offset) const2147 void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2148 MachineBasicBlock *MBB,
2149 MachineBasicBlock *DispatchBB,
2150 int FI, int Offset) const {
2151 DebugLoc DL = MI.getDebugLoc();
2152 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2153
2154 Register LabelReg =
2155 prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
2156
2157 // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2158 // referenced by longjmp (throw) later.
2159 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2160 addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2161 MIB.addReg(LabelReg, getKillRegState(true));
2162 }
2163
2164 MachineBasicBlock *
emitEHSjLjSetJmp(MachineInstr & MI,MachineBasicBlock * MBB) const2165 VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2166 MachineBasicBlock *MBB) const {
2167 DebugLoc DL = MI.getDebugLoc();
2168 MachineFunction *MF = MBB->getParent();
2169 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2170 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2171 MachineRegisterInfo &MRI = MF->getRegInfo();
2172
2173 const BasicBlock *BB = MBB->getBasicBlock();
2174 MachineFunction::iterator I = ++MBB->getIterator();
2175
2176 // Memory Reference.
2177 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2178 MI.memoperands_end());
2179 Register BufReg = MI.getOperand(1).getReg();
2180
2181 Register DstReg;
2182
2183 DstReg = MI.getOperand(0).getReg();
2184 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
2185 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2186 (void)TRI;
2187 Register MainDestReg = MRI.createVirtualRegister(RC);
2188 Register RestoreDestReg = MRI.createVirtualRegister(RC);
2189
2190 // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2191 // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2192 //
2193 // ThisMBB:
2194 // buf[3] = %s17 iff %s17 is used as BP
2195 // buf[1] = RestoreMBB as IC after longjmp
2196 // # SjLjSetup RestoreMBB
2197 //
2198 // MainMBB:
2199 // v_main = 0
2200 //
2201 // SinkMBB:
2202 // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2203 // ...
2204 //
2205 // RestoreMBB:
2206 // %s17 = buf[3] = iff %s17 is used as BP
2207 // v_restore = 1
2208 // goto SinkMBB
2209
2210 MachineBasicBlock *ThisMBB = MBB;
2211 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2212 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2213 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2214 MF->insert(I, MainMBB);
2215 MF->insert(I, SinkMBB);
2216 MF->push_back(RestoreMBB);
2217 RestoreMBB->setHasAddressTaken();
2218
2219 // Transfer the remainder of BB and its successor edges to SinkMBB.
2220 SinkMBB->splice(SinkMBB->begin(), MBB,
2221 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2222 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2223
2224 // ThisMBB:
2225 Register LabelReg =
2226 prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
2227
2228 // Store BP in buf[3] iff this function is using BP.
2229 const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2230 if (TFI->hasBP(*MF)) {
2231 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2232 MIB.addReg(BufReg);
2233 MIB.addImm(0);
2234 MIB.addImm(24);
2235 MIB.addReg(VE::SX17);
2236 MIB.setMemRefs(MMOs);
2237 }
2238
2239 // Store IP in buf[1].
2240 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2241 MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2242 MIB.addImm(0);
2243 MIB.addImm(8);
2244 MIB.addReg(LabelReg, getKillRegState(true));
2245 MIB.setMemRefs(MMOs);
2246
2247 // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2248
2249 // Insert setup.
2250 MIB =
2251 BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2252
2253 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2254 MIB.addRegMask(RegInfo->getNoPreservedMask());
2255 ThisMBB->addSuccessor(MainMBB);
2256 ThisMBB->addSuccessor(RestoreMBB);
2257
2258 // MainMBB:
2259 BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2260 .addImm(0)
2261 .addImm(0)
2262 .addImm(0);
2263 MainMBB->addSuccessor(SinkMBB);
2264
2265 // SinkMBB:
2266 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2267 .addReg(MainDestReg)
2268 .addMBB(MainMBB)
2269 .addReg(RestoreDestReg)
2270 .addMBB(RestoreMBB);
2271
2272 // RestoreMBB:
2273 // Restore BP from buf[3] iff this function is using BP. The address of
2274 // buf is in SX10.
2275 // FIXME: Better to not use SX10 here
2276 if (TFI->hasBP(*MF)) {
2277 MachineInstrBuilder MIB =
2278 BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2279 MIB.addReg(VE::SX10);
2280 MIB.addImm(0);
2281 MIB.addImm(24);
2282 MIB.setMemRefs(MMOs);
2283 }
2284 BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2285 .addImm(0)
2286 .addImm(0)
2287 .addImm(1);
2288 BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2289 RestoreMBB->addSuccessor(SinkMBB);
2290
2291 MI.eraseFromParent();
2292 return SinkMBB;
2293 }
2294
2295 MachineBasicBlock *
emitEHSjLjLongJmp(MachineInstr & MI,MachineBasicBlock * MBB) const2296 VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2297 MachineBasicBlock *MBB) const {
2298 DebugLoc DL = MI.getDebugLoc();
2299 MachineFunction *MF = MBB->getParent();
2300 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2301 MachineRegisterInfo &MRI = MF->getRegInfo();
2302
2303 // Memory Reference.
2304 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2305 MI.memoperands_end());
2306 Register BufReg = MI.getOperand(0).getReg();
2307
2308 Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2309 // Since FP is only updated here but NOT referenced, it's treated as GPR.
2310 Register FP = VE::SX9;
2311 Register SP = VE::SX11;
2312
2313 MachineInstrBuilder MIB;
2314
2315 MachineBasicBlock *ThisMBB = MBB;
2316
2317 // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2318 //
2319 // ThisMBB:
2320 // %fp = load buf[0]
2321 // %jmp = load buf[1]
2322 // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2323 // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2324 // jmp %jmp
2325
2326 // Reload FP.
2327 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2328 MIB.addReg(BufReg);
2329 MIB.addImm(0);
2330 MIB.addImm(0);
2331 MIB.setMemRefs(MMOs);
2332
2333 // Reload IP.
2334 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2335 MIB.addReg(BufReg);
2336 MIB.addImm(0);
2337 MIB.addImm(8);
2338 MIB.setMemRefs(MMOs);
2339
2340 // Copy BufReg to SX10 for later use in setjmp.
2341 // FIXME: Better to not use SX10 here
2342 BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2343 .addReg(BufReg)
2344 .addImm(0);
2345
2346 // Reload SP.
2347 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2348 MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2349 MIB.addImm(0);
2350 MIB.addImm(16);
2351 MIB.setMemRefs(MMOs);
2352
2353 // Jump.
2354 BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2355 .addReg(Tmp, getKillRegState(true))
2356 .addImm(0);
2357
2358 MI.eraseFromParent();
2359 return ThisMBB;
2360 }
2361
2362 MachineBasicBlock *
emitSjLjDispatchBlock(MachineInstr & MI,MachineBasicBlock * BB) const2363 VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2364 MachineBasicBlock *BB) const {
2365 DebugLoc DL = MI.getDebugLoc();
2366 MachineFunction *MF = BB->getParent();
2367 MachineFrameInfo &MFI = MF->getFrameInfo();
2368 MachineRegisterInfo &MRI = MF->getRegInfo();
2369 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2370 int FI = MFI.getFunctionContextIndex();
2371
2372 // Get a mapping of the call site numbers to all of the landing pads they're
2373 // associated with.
2374 DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2375 unsigned MaxCSNum = 0;
2376 for (auto &MBB : *MF) {
2377 if (!MBB.isEHPad())
2378 continue;
2379
2380 MCSymbol *Sym = nullptr;
2381 for (const auto &MI : MBB) {
2382 if (MI.isDebugInstr())
2383 continue;
2384
2385 assert(MI.isEHLabel() && "expected EH_LABEL");
2386 Sym = MI.getOperand(0).getMCSymbol();
2387 break;
2388 }
2389
2390 if (!MF->hasCallSiteLandingPad(Sym))
2391 continue;
2392
2393 for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2394 CallSiteNumToLPad[CSI].push_back(&MBB);
2395 MaxCSNum = std::max(MaxCSNum, CSI);
2396 }
2397 }
2398
2399 // Get an ordered list of the machine basic blocks for the jump table.
2400 std::vector<MachineBasicBlock *> LPadList;
2401 SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2402 LPadList.reserve(CallSiteNumToLPad.size());
2403
2404 for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2405 for (auto &LP : CallSiteNumToLPad[CSI]) {
2406 LPadList.push_back(LP);
2407 InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
2408 }
2409 }
2410
2411 assert(!LPadList.empty() &&
2412 "No landing pad destinations for the dispatch jump table!");
2413
2414 // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2415 // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2416 //
2417 // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2418 // First `i64` is callsite, so callsite is FI+8.
2419 static const int OffsetIC = 72;
2420 static const int OffsetCS = 8;
2421
2422 // Create the MBBs for the dispatch code like following:
2423 //
2424 // ThisMBB:
2425 // Prepare DispatchBB address and store it to buf[1].
2426 // ...
2427 //
2428 // DispatchBB:
2429 // %s15 = GETGOT iff isPositionIndependent
2430 // %callsite = load callsite
2431 // brgt.l.t #size of callsites, %callsite, DispContBB
2432 //
2433 // TrapBB:
2434 // Call abort.
2435 //
2436 // DispContBB:
2437 // %breg = address of jump table
2438 // %pc = load and calculate next pc from %breg and %callsite
2439 // jmp %pc
2440
2441 // Shove the dispatch's address into the return slot in the function context.
2442 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2443 DispatchBB->setIsEHPad(true);
2444
2445 // Trap BB will causes trap like `assert(0)`.
2446 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2447 DispatchBB->addSuccessor(TrapBB);
2448
2449 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2450 DispatchBB->addSuccessor(DispContBB);
2451
2452 // Insert MBBs.
2453 MF->push_back(DispatchBB);
2454 MF->push_back(DispContBB);
2455 MF->push_back(TrapBB);
2456
2457 // Insert code to call abort in the TrapBB.
2458 Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2459 /* Local */ false, /* Call */ true);
2460 BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2461 .addReg(Abort, getKillRegState(true))
2462 .addImm(0)
2463 .addImm(0);
2464
2465 // Insert code into the entry block that creates and registers the function
2466 // context.
2467 setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2468
2469 // Create the jump table and associated information
2470 unsigned JTE = getJumpTableEncoding();
2471 MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
2472 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2473
2474 const VERegisterInfo &RI = TII->getRegisterInfo();
2475 // Add a register mask with no preserved registers. This results in all
2476 // registers being marked as clobbered.
2477 BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2478 .addRegMask(RI.getNoPreservedMask());
2479
2480 if (isPositionIndependent()) {
2481 // Force to generate GETGOT, since current implementation doesn't store GOT
2482 // register.
2483 BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2484 }
2485
2486 // IReg is used as an index in a memory operand and therefore can't be SP
2487 const TargetRegisterClass *RC = &VE::I64RegClass;
2488 Register IReg = MRI.createVirtualRegister(RC);
2489 addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2490 OffsetCS);
2491 if (LPadList.size() < 64) {
2492 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2493 .addImm(VECC::CC_ILE)
2494 .addImm(LPadList.size())
2495 .addReg(IReg)
2496 .addMBB(TrapBB);
2497 } else {
2498 assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2499 Register TmpReg = MRI.createVirtualRegister(RC);
2500 BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2501 .addImm(0)
2502 .addImm(0)
2503 .addImm(LPadList.size());
2504 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2505 .addImm(VECC::CC_ILE)
2506 .addReg(TmpReg, getKillRegState(true))
2507 .addReg(IReg)
2508 .addMBB(TrapBB);
2509 }
2510
2511 Register BReg = MRI.createVirtualRegister(RC);
2512 Register Tmp1 = MRI.createVirtualRegister(RC);
2513 Register Tmp2 = MRI.createVirtualRegister(RC);
2514
2515 if (isPositionIndependent()) {
2516 // Create following instructions for local linkage PIC code.
2517 // lea %Tmp1, .LJTI0_0@gotoff_lo
2518 // and %Tmp2, %Tmp1, (32)0
2519 // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2520 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2521 .addImm(0)
2522 .addImm(0)
2523 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
2524 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2525 .addReg(Tmp1, getKillRegState(true))
2526 .addImm(M0(32));
2527 BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2528 .addReg(VE::SX15)
2529 .addReg(Tmp2, getKillRegState(true))
2530 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
2531 } else {
2532 // Create following instructions for non-PIC code.
2533 // lea %Tmp1, .LJTI0_0@lo
2534 // and %Tmp2, %Tmp1, (32)0
2535 // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2536 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2537 .addImm(0)
2538 .addImm(0)
2539 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
2540 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2541 .addReg(Tmp1, getKillRegState(true))
2542 .addImm(M0(32));
2543 BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2544 .addReg(Tmp2, getKillRegState(true))
2545 .addImm(0)
2546 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
2547 }
2548
2549 switch (JTE) {
2550 case MachineJumpTableInfo::EK_BlockAddress: {
2551 // Generate simple block address code for no-PIC model.
2552 // sll %Tmp1, %IReg, 3
2553 // lds %TReg, 0(%Tmp1, %BReg)
2554 // bcfla %TReg
2555
2556 Register TReg = MRI.createVirtualRegister(RC);
2557 Register Tmp1 = MRI.createVirtualRegister(RC);
2558
2559 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2560 .addReg(IReg, getKillRegState(true))
2561 .addImm(3);
2562 BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2563 .addReg(BReg, getKillRegState(true))
2564 .addReg(Tmp1, getKillRegState(true))
2565 .addImm(0);
2566 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2567 .addReg(TReg, getKillRegState(true))
2568 .addImm(0);
2569 break;
2570 }
2571 case MachineJumpTableInfo::EK_Custom32: {
2572 // Generate block address code using differences from the function pointer
2573 // for PIC model.
2574 // sll %Tmp1, %IReg, 2
2575 // ldl.zx %OReg, 0(%Tmp1, %BReg)
2576 // Prepare function address in BReg2.
2577 // adds.l %TReg, %BReg2, %OReg
2578 // bcfla %TReg
2579
2580 assert(isPositionIndependent());
2581 Register OReg = MRI.createVirtualRegister(RC);
2582 Register TReg = MRI.createVirtualRegister(RC);
2583 Register Tmp1 = MRI.createVirtualRegister(RC);
2584
2585 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2586 .addReg(IReg, getKillRegState(true))
2587 .addImm(2);
2588 BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2589 .addReg(BReg, getKillRegState(true))
2590 .addReg(Tmp1, getKillRegState(true))
2591 .addImm(0);
2592 Register BReg2 =
2593 prepareSymbol(*DispContBB, DispContBB->end(),
2594 DispContBB->getParent()->getName(), DL, /* Local */ true);
2595 BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2596 .addReg(OReg, getKillRegState(true))
2597 .addReg(BReg2, getKillRegState(true));
2598 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2599 .addReg(TReg, getKillRegState(true))
2600 .addImm(0);
2601 break;
2602 }
2603 default:
2604 llvm_unreachable("Unexpected jump table encoding");
2605 }
2606
2607 // Add the jump table entries as successors to the MBB.
2608 SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2609 for (auto &LP : LPadList)
2610 if (SeenMBBs.insert(LP).second)
2611 DispContBB->addSuccessor(LP);
2612
2613 // N.B. the order the invoke BBs are processed in doesn't matter here.
2614 SmallVector<MachineBasicBlock *, 64> MBBLPads;
2615 const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2616 for (MachineBasicBlock *MBB : InvokeBBs) {
2617 // Remove the landing pad successor from the invoke block and replace it
2618 // with the new dispatch block.
2619 // Keep a copy of Successors since it's modified inside the loop.
2620 SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2621 MBB->succ_rend());
2622 // FIXME: Avoid quadratic complexity.
2623 for (auto MBBS : Successors) {
2624 if (MBBS->isEHPad()) {
2625 MBB->removeSuccessor(MBBS);
2626 MBBLPads.push_back(MBBS);
2627 }
2628 }
2629
2630 MBB->addSuccessor(DispatchBB);
2631
2632 // Find the invoke call and mark all of the callee-saved registers as
2633 // 'implicit defined' so that they're spilled. This prevents code from
2634 // moving instructions to before the EH block, where they will never be
2635 // executed.
2636 for (auto &II : reverse(*MBB)) {
2637 if (!II.isCall())
2638 continue;
2639
2640 DenseMap<Register, bool> DefRegs;
2641 for (auto &MOp : II.operands())
2642 if (MOp.isReg())
2643 DefRegs[MOp.getReg()] = true;
2644
2645 MachineInstrBuilder MIB(*MF, &II);
2646 for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2647 Register Reg = SavedRegs[RI];
2648 if (!DefRegs[Reg])
2649 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
2650 }
2651
2652 break;
2653 }
2654 }
2655
2656 // Mark all former landing pads as non-landing pads. The dispatch is the only
2657 // landing pad now.
2658 for (auto &LP : MBBLPads)
2659 LP->setIsEHPad(false);
2660
2661 // The instruction is gone now.
2662 MI.eraseFromParent();
2663 return BB;
2664 }
2665
2666 MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * BB) const2667 VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2668 MachineBasicBlock *BB) const {
2669 switch (MI.getOpcode()) {
2670 default:
2671 llvm_unreachable("Unknown Custom Instruction!");
2672 case VE::EH_SjLj_LongJmp:
2673 return emitEHSjLjLongJmp(MI, BB);
2674 case VE::EH_SjLj_SetJmp:
2675 return emitEHSjLjSetJmp(MI, BB);
2676 case VE::EH_SjLj_Setup_Dispatch:
2677 return emitSjLjDispatchBlock(MI, BB);
2678 }
2679 }
2680
isI32Insn(const SDNode * User,const SDNode * N)2681 static bool isI32Insn(const SDNode *User, const SDNode *N) {
2682 switch (User->getOpcode()) {
2683 default:
2684 return false;
2685 case ISD::ADD:
2686 case ISD::SUB:
2687 case ISD::MUL:
2688 case ISD::SDIV:
2689 case ISD::UDIV:
2690 case ISD::SETCC:
2691 case ISD::SMIN:
2692 case ISD::SMAX:
2693 case ISD::SHL:
2694 case ISD::SRA:
2695 case ISD::BSWAP:
2696 case ISD::SINT_TO_FP:
2697 case ISD::UINT_TO_FP:
2698 case ISD::BR_CC:
2699 case ISD::BITCAST:
2700 case ISD::ATOMIC_CMP_SWAP:
2701 case ISD::ATOMIC_SWAP:
2702 return true;
2703 case ISD::SRL:
2704 if (N->getOperand(0).getOpcode() != ISD::SRL)
2705 return true;
2706 // (srl (trunc (srl ...))) may be optimized by combining srl, so
2707 // doesn't optimize trunc now.
2708 return false;
2709 case ISD::SELECT_CC:
2710 if (User->getOperand(2).getNode() != N &&
2711 User->getOperand(3).getNode() != N)
2712 return true;
2713 LLVM_FALLTHROUGH;
2714 case ISD::AND:
2715 case ISD::OR:
2716 case ISD::XOR:
2717 case ISD::SELECT:
2718 case ISD::CopyToReg:
2719 // Check all use of selections, bit operations, and copies. If all of them
2720 // are safe, optimize truncate to extract_subreg.
2721 for (const SDNode *U : User->uses()) {
2722 switch (U->getOpcode()) {
2723 default:
2724 // If the use is an instruction which treats the source operand as i32,
2725 // it is safe to avoid truncate here.
2726 if (isI32Insn(U, N))
2727 continue;
2728 break;
2729 case ISD::ANY_EXTEND:
2730 case ISD::SIGN_EXTEND:
2731 case ISD::ZERO_EXTEND: {
2732 // Special optimizations to the combination of ext and trunc.
2733 // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2734 // since this truncate instruction clears higher 32 bits which is filled
2735 // by one of ext instructions later.
2736 assert(N->getValueType(0) == MVT::i32 &&
2737 "find truncate to not i32 integer");
2738 if (User->getOpcode() == ISD::SELECT_CC ||
2739 User->getOpcode() == ISD::SELECT)
2740 continue;
2741 break;
2742 }
2743 }
2744 return false;
2745 }
2746 return true;
2747 }
2748 }
2749
2750 // Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2751 // sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2752 // is sometime too late. So, doing it at here.
combineTRUNCATE(SDNode * N,DAGCombinerInfo & DCI) const2753 SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2754 DAGCombinerInfo &DCI) const {
2755 assert(N->getOpcode() == ISD::TRUNCATE &&
2756 "Should be called with a TRUNCATE node");
2757
2758 SelectionDAG &DAG = DCI.DAG;
2759 SDLoc DL(N);
2760 EVT VT = N->getValueType(0);
2761
2762 // We prefer to do this when all types are legal.
2763 if (!DCI.isAfterLegalizeDAG())
2764 return SDValue();
2765
2766 // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2767 if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
2768 isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
2769 isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
2770 return SDValue();
2771
2772 // Check all use of this TRUNCATE.
2773 for (const SDNode *User : N->uses()) {
2774 // Make sure that we're not going to replace TRUNCATE for non i32
2775 // instructions.
2776 //
2777 // FIXME: Although we could sometimes handle this, and it does occur in
2778 // practice that one of the condition inputs to the select is also one of
2779 // the outputs, we currently can't deal with this.
2780 if (isI32Insn(User, N))
2781 continue;
2782
2783 return SDValue();
2784 }
2785
2786 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
2787 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
2788 N->getOperand(0), SubI32),
2789 0);
2790 }
2791
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const2792 SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
2793 DAGCombinerInfo &DCI) const {
2794 switch (N->getOpcode()) {
2795 default:
2796 break;
2797 case ISD::TRUNCATE:
2798 return combineTRUNCATE(N, DCI);
2799 }
2800
2801 return SDValue();
2802 }
2803
2804 //===----------------------------------------------------------------------===//
2805 // VE Inline Assembly Support
2806 //===----------------------------------------------------------------------===//
2807
2808 VETargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const2809 VETargetLowering::getConstraintType(StringRef Constraint) const {
2810 if (Constraint.size() == 1) {
2811 switch (Constraint[0]) {
2812 default:
2813 break;
2814 case 'v': // vector registers
2815 return C_RegisterClass;
2816 }
2817 }
2818 return TargetLowering::getConstraintType(Constraint);
2819 }
2820
2821 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const2822 VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2823 StringRef Constraint,
2824 MVT VT) const {
2825 const TargetRegisterClass *RC = nullptr;
2826 if (Constraint.size() == 1) {
2827 switch (Constraint[0]) {
2828 default:
2829 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2830 case 'r':
2831 RC = &VE::I64RegClass;
2832 break;
2833 case 'v':
2834 RC = &VE::V64RegClass;
2835 break;
2836 }
2837 return std::make_pair(0U, RC);
2838 }
2839
2840 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2841 }
2842
2843 //===----------------------------------------------------------------------===//
2844 // VE Target Optimization Support
2845 //===----------------------------------------------------------------------===//
2846
getMinimumJumpTableEntries() const2847 unsigned VETargetLowering::getMinimumJumpTableEntries() const {
2848 // Specify 8 for PIC model to relieve the impact of PIC load instructions.
2849 if (isJumpTableRelative())
2850 return 8;
2851
2852 return TargetLowering::getMinimumJumpTableEntries();
2853 }
2854
hasAndNot(SDValue Y) const2855 bool VETargetLowering::hasAndNot(SDValue Y) const {
2856 EVT VT = Y.getValueType();
2857
2858 // VE doesn't have vector and not instruction.
2859 if (VT.isVector())
2860 return false;
2861
2862 // VE allows different immediate values for X and Y where ~X & Y.
2863 // Only simm7 works for X, and only mimm works for Y on VE. However, this
2864 // function is used to check whether an immediate value is OK for and-not
2865 // instruction as both X and Y. Generating additional instruction to
2866 // retrieve an immediate value is no good since the purpose of this
2867 // function is to convert a series of 3 instructions to another series of
2868 // 3 instructions with better parallelism. Therefore, we return false
2869 // for all immediate values now.
2870 // FIXME: Change hasAndNot function to have two operands to make it work
2871 // correctly with Aurora VE.
2872 if (isa<ConstantSDNode>(Y))
2873 return false;
2874
2875 // It's ok for generic registers.
2876 return true;
2877 }
2878
lowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const2879 SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2880 SelectionDAG &DAG) const {
2881 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
2882 MVT VT = Op.getOperand(0).getSimpleValueType();
2883
2884 // Special treatment for packed V64 types.
2885 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
2886 (void)VT;
2887 // Example of codes:
2888 // %packed_v = extractelt %vr, %idx / 2
2889 // %v = %packed_v >> (%idx % 2 * 32)
2890 // %res = %v & 0xffffffff
2891
2892 SDValue Vec = Op.getOperand(0);
2893 SDValue Idx = Op.getOperand(1);
2894 SDLoc DL(Op);
2895 SDValue Result = Op;
2896 if (false /* Idx->isConstant() */) {
2897 // TODO: optimized implementation using constant values
2898 } else {
2899 SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
2900 SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
2901 SDValue PackedElt =
2902 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
2903 SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
2904 SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
2905 SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
2906 Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
2907 PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
2908 SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
2909 PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
2910 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
2911 Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
2912 MVT::i32, PackedElt, SubI32),
2913 0);
2914
2915 if (Op.getSimpleValueType() == MVT::f32) {
2916 Result = DAG.getBitcast(MVT::f32, Result);
2917 } else {
2918 assert(Op.getSimpleValueType() == MVT::i32);
2919 }
2920 }
2921 return Result;
2922 }
2923
lowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const2924 SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2925 SelectionDAG &DAG) const {
2926 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
2927 MVT VT = Op.getOperand(0).getSimpleValueType();
2928
2929 // Special treatment for packed V64 types.
2930 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
2931 (void)VT;
2932 // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
2933 // bits" required `val << 32` from C implementation's point of view.
2934 //
2935 // Example of codes:
2936 // %packed_elt = extractelt %vr, (%idx >> 1)
2937 // %shift = ((%idx & 1) ^ 1) << 5
2938 // %packed_elt &= 0xffffffff00000000 >> shift
2939 // %packed_elt |= (zext %val) << shift
2940 // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
2941
2942 SDLoc DL(Op);
2943 SDValue Vec = Op.getOperand(0);
2944 SDValue Val = Op.getOperand(1);
2945 SDValue Idx = Op.getOperand(2);
2946 if (Idx.getSimpleValueType() == MVT::i32)
2947 Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
2948 if (Val.getSimpleValueType() == MVT::f32)
2949 Val = DAG.getBitcast(MVT::i32, Val);
2950 assert(Val.getSimpleValueType() == MVT::i32);
2951 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
2952
2953 SDValue Result = Op;
2954 if (false /* Idx->isConstant()*/) {
2955 // TODO: optimized implementation using constant values
2956 } else {
2957 SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
2958 SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
2959 SDValue PackedElt =
2960 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
2961 SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
2962 SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
2963 SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
2964 Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
2965 SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
2966 Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
2967 PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
2968 Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
2969 PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
2970 Result =
2971 SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
2972 {HalfIdx, PackedElt, Vec}),
2973 0);
2974 }
2975 return Result;
2976 }
2977